{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 聚类\n",
    "\n",
    "熟悉各中聚类算法的调用\n",
    "并用评价指标选择合适的超参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#导入必要的工具包\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.cluster import MiniBatchKMeans\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn import metrics\n",
    "\n",
    "from sklearn.decomposition import PCA\n",
    "import time\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>event_id</th>\n",
       "      <th>user_id</th>\n",
       "      <th>start_time</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>zip</th>\n",
       "      <th>country</th>\n",
       "      <th>lat</th>\n",
       "      <th>lng</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>684921758</td>\n",
       "      <td>3647864012</td>\n",
       "      <td>2012-10-31T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>244999119</td>\n",
       "      <td>3476440521</td>\n",
       "      <td>2012-11-03T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>3928440935</td>\n",
       "      <td>517514445</td>\n",
       "      <td>2012-11-05T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2582345152</td>\n",
       "      <td>781585781</td>\n",
       "      <td>2012-10-30T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>1051165850</td>\n",
       "      <td>1016098580</td>\n",
       "      <td>2012-09-27T00:00:00.001Z</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 111 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0    event_id     user_id                start_time city state  \\\n",
       "0           0   684921758  3647864012  2012-10-31T00:00:00.001Z  NaN   NaN   \n",
       "1           1   244999119  3476440521  2012-11-03T00:00:00.001Z  NaN   NaN   \n",
       "2           2  3928440935   517514445  2012-11-05T00:00:00.001Z  NaN   NaN   \n",
       "3           3  2582345152   781585781  2012-10-30T00:00:00.001Z  NaN   NaN   \n",
       "4           4  1051165850  1016098580  2012-09-27T00:00:00.001Z  NaN   NaN   \n",
       "\n",
       "   zip country  lat  lng   ...     c_92  c_93  c_94  c_95  c_96  c_97  c_98  \\\n",
       "0  NaN     NaN  NaN  NaN   ...        0     1     0     0     0     0     0   \n",
       "1  NaN     NaN  NaN  NaN   ...        0     0     0     0     0     0     0   \n",
       "2  NaN     NaN  NaN  NaN   ...        0     0     0     0     0     0     0   \n",
       "3  NaN     NaN  NaN  NaN   ...        0     0     0     0     0     0     0   \n",
       "4  NaN     NaN  NaN  NaN   ...        0     0     0     0     0     0     0   \n",
       "\n",
       "   c_99  c_100  c_other  \n",
       "0     0      0        9  \n",
       "1     0      0        7  \n",
       "2     0      0       12  \n",
       "3     0      0        8  \n",
       "4     0      0        9  \n",
       "\n",
       "[5 rows x 111 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#读取训练数据\n",
    "origin_data = pd.read_csv('clusterdata_for_cumtzd.csv')\n",
    "origin_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c_1</th>\n",
       "      <th>c_2</th>\n",
       "      <th>c_3</th>\n",
       "      <th>c_4</th>\n",
       "      <th>c_5</th>\n",
       "      <th>c_6</th>\n",
       "      <th>c_7</th>\n",
       "      <th>c_8</th>\n",
       "      <th>c_9</th>\n",
       "      <th>c_10</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 101 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   c_1  c_2  c_3  c_4  c_5  c_6  c_7  c_8  c_9  c_10   ...     c_92  c_93  \\\n",
       "0    2    0    2    0    0    0    0    0    0     0   ...        0     1   \n",
       "1    2    0    2    0    0    0    0    0    0     0   ...        0     0   \n",
       "2    0    0    0    0    0    0    0    0    0     0   ...        0     0   \n",
       "3    1    0    2    1    0    0    0    0    0     0   ...        0     0   \n",
       "4    1    1    0    0    0    0    0    2    0     0   ...        0     0   \n",
       "\n",
       "   c_94  c_95  c_96  c_97  c_98  c_99  c_100  c_other  \n",
       "0     0     0     0     0     0     0      0        9  \n",
       "1     0     0     0     0     0     0      0        7  \n",
       "2     0     0     0     0     0     0      0       12  \n",
       "3     0     0     0     0     0     0      0        8  \n",
       "4     0     0     0     0     0     0      0        9  \n",
       "\n",
       "[5 rows x 101 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = origin_data.drop([\"event_id\",\"user_id\",\"start_time\",\"city\",\"state\",\"zip\",\"country\",\"lat\",\"lng\"],axis=1)\n",
    "train=train.drop(train.columns[0],axis=1)\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>c_1</th>\n",
       "      <th>c_2</th>\n",
       "      <th>c_3</th>\n",
       "      <th>c_4</th>\n",
       "      <th>c_5</th>\n",
       "      <th>c_6</th>\n",
       "      <th>c_7</th>\n",
       "      <th>c_8</th>\n",
       "      <th>c_9</th>\n",
       "      <th>c_10</th>\n",
       "      <th>...</th>\n",
       "      <th>c_92</th>\n",
       "      <th>c_93</th>\n",
       "      <th>c_94</th>\n",
       "      <th>c_95</th>\n",
       "      <th>c_96</th>\n",
       "      <th>c_97</th>\n",
       "      <th>c_98</th>\n",
       "      <th>c_99</th>\n",
       "      <th>c_100</th>\n",
       "      <th>c_other</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "      <td>13418.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>2.359964</td>\n",
       "      <td>1.464972</td>\n",
       "      <td>1.323372</td>\n",
       "      <td>0.888732</td>\n",
       "      <td>1.159711</td>\n",
       "      <td>2.479654</td>\n",
       "      <td>0.986809</td>\n",
       "      <td>0.584439</td>\n",
       "      <td>0.660754</td>\n",
       "      <td>0.552914</td>\n",
       "      <td>...</td>\n",
       "      <td>0.064913</td>\n",
       "      <td>0.083992</td>\n",
       "      <td>0.093755</td>\n",
       "      <td>0.070502</td>\n",
       "      <td>0.082427</td>\n",
       "      <td>0.233790</td>\n",
       "      <td>0.082874</td>\n",
       "      <td>0.076837</td>\n",
       "      <td>0.073558</td>\n",
       "      <td>57.554777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>19.331141</td>\n",
       "      <td>2.959769</td>\n",
       "      <td>2.720104</td>\n",
       "      <td>1.972209</td>\n",
       "      <td>15.695718</td>\n",
       "      <td>7.375475</td>\n",
       "      <td>18.371845</td>\n",
       "      <td>1.395134</td>\n",
       "      <td>1.630597</td>\n",
       "      <td>1.243510</td>\n",
       "      <td>...</td>\n",
       "      <td>0.309890</td>\n",
       "      <td>0.377730</td>\n",
       "      <td>0.388404</td>\n",
       "      <td>0.312148</td>\n",
       "      <td>0.503164</td>\n",
       "      <td>15.553234</td>\n",
       "      <td>0.356777</td>\n",
       "      <td>0.455338</td>\n",
       "      <td>0.337954</td>\n",
       "      <td>110.916584</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>14.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>38.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>3.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>75.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>2186.000000</td>\n",
       "      <td>82.000000</td>\n",
       "      <td>85.000000</td>\n",
       "      <td>71.000000</td>\n",
       "      <td>1801.000000</td>\n",
       "      <td>306.000000</td>\n",
       "      <td>2120.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>51.000000</td>\n",
       "      <td>...</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>23.000000</td>\n",
       "      <td>1801.000000</td>\n",
       "      <td>9.000000</td>\n",
       "      <td>16.000000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>9664.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8 rows × 101 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                c_1           c_2           c_3           c_4           c_5  \\\n",
       "count  13418.000000  13418.000000  13418.000000  13418.000000  13418.000000   \n",
       "mean       2.359964      1.464972      1.323372      0.888732      1.159711   \n",
       "std       19.331141      2.959769      2.720104      1.972209     15.695718   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        1.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        3.000000      2.000000      2.000000      1.000000      1.000000   \n",
       "max     2186.000000     82.000000     85.000000     71.000000   1801.000000   \n",
       "\n",
       "                c_6           c_7           c_8           c_9          c_10  \\\n",
       "count  13418.000000  13418.000000  13418.000000  13418.000000  13418.000000   \n",
       "mean       2.479654      0.986809      0.584439      0.660754      0.552914   \n",
       "std        7.375475     18.371845      1.395134      1.630597      1.243510   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        2.000000      1.000000      1.000000      1.000000      1.000000   \n",
       "max      306.000000   2120.000000     23.000000     51.000000     51.000000   \n",
       "\n",
       "           ...               c_92          c_93          c_94          c_95  \\\n",
       "count      ...       13418.000000  13418.000000  13418.000000  13418.000000   \n",
       "mean       ...           0.064913      0.083992      0.093755      0.070502   \n",
       "std        ...           0.309890      0.377730      0.388404      0.312148   \n",
       "min        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "25%        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "50%        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "75%        ...           0.000000      0.000000      0.000000      0.000000   \n",
       "max        ...           7.000000      9.000000     10.000000      9.000000   \n",
       "\n",
       "               c_96          c_97          c_98          c_99         c_100  \\\n",
       "count  13418.000000  13418.000000  13418.000000  13418.000000  13418.000000   \n",
       "mean       0.082427      0.233790      0.082874      0.076837      0.073558   \n",
       "std        0.503164     15.553234      0.356777      0.455338      0.337954   \n",
       "min        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "25%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "50%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "75%        0.000000      0.000000      0.000000      0.000000      0.000000   \n",
       "max       23.000000   1801.000000      9.000000     16.000000      7.000000   \n",
       "\n",
       "            c_other  \n",
       "count  13418.000000  \n",
       "mean      57.554777  \n",
       "std      110.916584  \n",
       "min        0.000000  \n",
       "25%       14.000000  \n",
       "50%       38.000000  \n",
       "75%       75.000000  \n",
       "max     9664.000000  \n",
       "\n",
       "[8 rows x 101 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 13418 entries, 0 to 13417\n",
      "Columns: 101 entries, c_1 to c_other\n",
      "dtypes: int64(101)\n",
      "memory usage: 10.3 MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "the shape of train: (13418, 101)\n"
     ]
    }
   ],
   "source": [
    "# 原始输入的特征维数和样本数目\n",
    "print('the shape of train: {}'.format(train.shape))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 没有标签值的K-means聚类，只能采用内部评价法\n",
    "def K_cluster_analysis(K,train_data):\n",
    "    start = time.time()\n",
    "    \n",
    "    print(\"K-means begin with clusters: {}\".format(K));\n",
    "    \n",
    "    #K-means,在数据集上训练\n",
    "    mb_kmeans = MiniBatchKMeans(n_clusters = K)\n",
    "    mb_kmeans.fit(train_data) \n",
    " \n",
    "    \n",
    "    # K值的评估标准\n",
    "    #常见的方法有轮廓系数Silhouette Coefficient和Calinski-Harabasz Index\n",
    "    #这两个分数值越大则聚类效果越好\n",
    "    CH_score = metrics.calinski_harabaz_score(train_data,mb_kmeans.predict(train_data))\n",
    "    Sc_score = metrics.silhouette_score(train_data,mb_kmeans.predict(train_data))   \n",
    "    \n",
    "     \n",
    "    end = time.time()\n",
    "    print(\"本次聚类耗时{}秒\".format(int(end-start)))\n",
    "    print(\"1.CH_score: {}\".format(CH_score))\n",
    "    print(\"2.Sc_score: {}\".format(Sc_score))\n",
    "    print(\"===================================================\")\n",
    "   \n",
    "    \n",
    "    return CH_score,Sc_score\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "K-means begin with clusters: 4\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 5950.9437877917835\n",
      "2.Sc_score: 0.5375929982438549\n",
      "===================================================\n",
      "K-means begin with clusters: 6\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 1645.5747666338764\n",
      "2.Sc_score: 0.4810035343574725\n",
      "===================================================\n",
      "K-means begin with clusters: 8\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 1254.9810101383985\n",
      "2.Sc_score: 0.42586148098747856\n",
      "===================================================\n",
      "K-means begin with clusters: 10\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 938.3784773094842\n",
      "2.Sc_score: 0.3230967816962167\n",
      "===================================================\n",
      "K-means begin with clusters: 20\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 498.69601259224976\n",
      "2.Sc_score: 0.22844796878482865\n",
      "===================================================\n",
      "K-means begin with clusters: 30\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 551.3109592561873\n",
      "2.Sc_score: 0.21193737125239334\n",
      "===================================================\n",
      "K-means begin with clusters: 40\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 895.2934726192689\n",
      "2.Sc_score: 0.13801972998432852\n",
      "===================================================\n",
      "K-means begin with clusters: 50\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 209.20361417884703\n",
      "2.Sc_score: 0.12138491719757752\n",
      "===================================================\n",
      "K-means begin with clusters: 60\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 205.9659973693352\n",
      "2.Sc_score: 0.10224425812103974\n",
      "===================================================\n",
      "K-means begin with clusters: 70\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 154.7137902671166\n",
      "2.Sc_score: 0.0813491066483999\n",
      "===================================================\n",
      "K-means begin with clusters: 80\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 125.93252009754518\n",
      "2.Sc_score: 0.11718038786980797\n",
      "===================================================\n",
      "K-means begin with clusters: 90\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 118.110613582599\n",
      "2.Sc_score: 0.07942399642998837\n",
      "===================================================\n",
      "K-means begin with clusters: 100\n",
      "本次聚类耗时8秒\n",
      "1.CH_score: 106.20116186726588\n",
      "2.Sc_score: 0.06093021767628729\n",
      "===================================================\n"
     ]
    }
   ],
   "source": [
    "# 设置超参数（聚类数目K）搜索范围\n",
    "Ks = [4,6,8,10,20,30,40,50,60,70,80,90,100]\n",
    "CH_scores = []\n",
    "Sc_scores = []\n",
    "for K in Ks:\n",
    "    ch,sc = K_cluster_analysis(K, train)\n",
    "    CH_scores.append(ch)\n",
    "    Sc_scores.append(sc)   "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0xb15ef98>]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAH85JREFUeJzt3X+clWWd//HXh98OEwwiog4wozalrInoJKRslraAVotb9FiSki1W9CGWVqZY28NWxbV2Vysrk8JCI9D8keiaRogl25cfQ6ipUJAKTCDMiuAPFGTm8/3jug6cYc7MHGDm3GfO/X4+Hudxzn1d1zn35+YM9+fc133d123ujoiIpE+3pAMQEZFkKAGIiKSUEoCISEopAYiIpJQSgIhISikBiIiklBKAiEhKKQGIiKSUEoCISEr1SDqAthxxxBFeXV2ddBgiIl3KypUr/8/dB7XXrqgTQHV1NXV1dUmHISLSpZjZ+nzaqQtIRCSllABERFJKCUBEJKXySgBmVmFm95rZGjNbbWYfMLPDzWyhma2NzwNiWzOz75nZOjN7xsxOzfqcKbH9WjOb0lkbJSIi7cv3COC7wKPufgIwAlgNzAAWuXsNsCguA5wL1MTHNOA2ADM7HLgWGAWcDlybSRoiIlJ47SYAM+sHfBCYDeDuu919OzABmBObzQHOj68nAHd6sBSoMLOjgXHAQnff5u6vAguB8R26NRlz50J1NXTrFp7nzu2U1YiIdGX5HAEcBzQAPzWzVWb2EzPrCwx2980A8fnI2L4S2Jj1/vpY1lp5x5o7F6ZNg/XrwT08T5umJCAisp98EkAP4FTgNncfCbzJvu6eXCxHmbdR3vzNZtPMrM7M6hoaGvIIbz9f/zrs3Nm8bOfOUC4iInvlkwDqgXp3XxaX7yUkhC2xa4f4vDWr/dCs9w8BNrVR3oy7z3L3WnevHTSo3QvZWtqw4cDKRURSqt0E4O4vAxvN7L2x6BzgeWABkBnJMwV4ML5eAFwYRwONBnbELqLHgLFmNiCe/B0byzrWsGEHVi4iklL5TgXxBWCumfUCXgA+R0ge95jZVGAD8KnY9hHgPGAdsDO2xd23mdn1wIrY7jp339YhW5Ft5ky46CJ46619ZWVloVxERPYy9xbd8EWjtrbWD2ouoFmz4OKLw+uqqrDznzy5Y4MTESlSZrbS3Wvba1eaVwJPiT1TN9wAL72knb+ISA6lmQB69YL3vx8O5iSyiEhKFPV00AfNDJYvTzoKEZGiVppHACIi0q7STQCf+ARceWXSUYiIFK3S7AICePFF2LMn6ShERIpW6R4BlJfDm28mHYWISNEq3QTQt68SgIhIG0o7AbzxRtJRiIgUrdI9B3DaadCvX9JRiIgUrdJNAF/7WtIRiIgUtdLtAhIRkTaVbgK49dYwBXRTU9KRiIgUpdJNAG+9BRs3trw7mIiIAKWcAPr2Dc8aCioikpMSgIhISpVuAigvD89KACIiOZVuAqiuhk9+MtwOUkREWijd6wBqa+Hee5OOQkSkaJXuEYCIiLSpdBPAX/8KAwfCPfckHYmISFEq3QTQuzds2wY7diQdiYhIUSrdBKBhoCIibVICEBFJqdJNAL16QY8eSgAiIq3IKwGY2Utm9icze8rM6mLZ4Wa20MzWxucBsdzM7Htmts7MnjGzU7M+Z0psv9bMpnTOJmWZOhVGjuz01YiIdEUHch3Ah939/7KWZwCL3P0mM5sRl68GzgVq4mMUcBswyswOB64FagEHVprZAnd/tQO2I7cf/ajTPlpEpKs7lC6gCcCc+HoOcH5W+Z0eLAUqzOxoYByw0N23xZ3+QmD8Iaw/P5oOWkQkp3wTgAO/MbOVZjYtlg12980A8fnIWF4JbMx6b30sa628GTObZmZ1ZlbX0NCQ/5bkMmoUTJhwaJ8hIlKi8u0COtPdN5nZkcBCM1vTRlvLUeZtlDcvcJ8FzAKora1tUX9AevbUSWARkVbkdQTg7pvi81bgAeB0YEvs2iE+b43N64GhWW8fAmxqo7zzlJcrAYiItKLdBGBmfc3sXZnXwFjgWWABkBnJMwV4ML5eAFwYRwONBnbELqLHgLFmNiCOGBobyzpP375KACIircinC2gw8ICZZdr/wt0fNbMVwD1mNhXYAHwqtn8EOA9YB+wEPgfg7tvM7HpgRWx3nbtv67AtyUUJQESkVe0mAHd/ARiRo/wV4Jwc5Q5Mb+Wz7gDuOPAwD9J558HxxxdsdSIiXUnp3g8AYNKkpCMQESlapTsVBEBjY5gN1A9tMJGISCkq7QRw881QUaHzACIiOZR2AtCMoCIirVICEBFJKSUAEZGUKu0EUF4enpUARERaKO0E8J73wLXXwtFHJx2JiEjRKe3rAI47Dr75zaSjEBEpSqV9BNDYCJs2wRtvJB2JiEjRKe0EsGkTVFbCvHlJRyIiUnRKOwHoJLCISKtKOwFoGKiISKtKOwH06gU9eugcgIhIDqWdAED3BBARaUVpDwMFuPFGOPHEpKMQESk6pZ8ALr006QhERIpS6XcBbdwIL76YdBQiIkWn9I8ALrggnAhevDjpSEREikrpHwHoJLCISE5KACIiKVX6CaC8XAlARCSH0k8AOgIQEcmp9E8Cf+YzMGZM0lGIiBSd0k8AZ5wRHiIi0kzeXUBm1t3MVpnZw3H5WDNbZmZrzexuM+sVy3vH5XWxvjrrM66J5X82s3EdvTE5bd0Kf/gD7NlTkNWJiHQVB3IO4HJgddbyt4Bb3L0GeBWYGsunAq+6+7uBW2I7zGw4MAn4O2A88EMz635o4efhvvvgzDPhlVc6fVUiIl1JXgnAzIYAHwV+EpcNOBu4NzaZA5wfX0+Iy8T6c2L7CcB8d9/l7i8C64DTO2Ij2qQpoUVEcsr3COA7wFVAU1weCGx390y/Sj1QGV9XAhsBYv2O2H5veY737GVm08yszszqGhoaDmBTWpFJAJoSWkSkmXYTgJl9DNjq7iuzi3M09Xbq2nrPvgL3We5e6+61gwYNai+89ukIQEQkp3xGAZ0J/KOZnQf0AfoRjggqzKxH/JU/BNgU29cDQ4F6M+sB9Ae2ZZVnZL+n8ygBiIjk1O4RgLtf4+5D3L2acBL3cXefDCwGJsZmU4AH4+sFcZlY/7i7eyyfFEcJHQvUAMs7bEtaM3w4/PKX8L73dfqqRES6kkO5DuBqYL6Z3QCsAmbH8tnAXWa2jvDLfxKAuz9nZvcAzwN7gOnu3ngI68/PwIEwcWL77UREUsbCj/PiVFtb63V1dYf2Ibt3wxNPwHveA9XVHRGWiEhRM7OV7l7bXrvSnwvozTdh3Dh44IGkIxERKSqlnwB0ElhEJKfSTwC9ekHPnkoAIiL7Kf0EAJoSWkQkh/QkAF0JLCLSTOlPBw0wdy50xFXFIiIlJB0J4Kyzko5ARKTopKMLaMkS+O1vk45CRKSopOMIYObMcD+Aj3wk6UhERIpGOo4ANApIRKQFJQARkZRSAhARSSklABGRlEpHArjsMnjyyaSjEBEpKukYBVRVFR4iIrJXOo4A/vIXmDVL00GIiGRJRwJYuhQuvhi2bEk6EhGRopGOBKB7AoiItJCuBKAuIBGRvdKVAHQEICKylxKAiEhKpWMY6PDhsHo1DB2adCQiIkUjHQmgTx844YSkoxARKSrp6AJ65x245RZYtizpSEREika7CcDM+pjZcjN72syeM7N/j+XHmtkyM1trZnebWa9Y3jsur4v11VmfdU0s/7OZjeusjcrpy1+GhQsLukoRkWKWzxHALuBsdx8BnAKMN7PRwLeAW9y9BngVmBrbTwVedfd3A7fEdpjZcGAS8HfAeOCHZta9IzemVT17Qq9eOgksIpKl3QTgQWYAfc/4cOBs4N5YPgc4P76eEJeJ9eeYmcXy+e6+y91fBNYBp3fIVuRDM4KKiDST1zkAM+tuZk8BW4GFwF+B7e6+JzapByrj60pgI0Cs3wEMzC7P8Z7OpwQgItJMXgnA3Rvd/RRgCOFX+4m5msVna6WutfJmzGyamdWZWV1DQ0M+4eWnb19dCSwikuWAhoG6+3YzewIYDVSYWY/4K38IsCk2qweGAvVm1gPoD2zLKs/Ifk/2OmYBswBqa2tbJIiD9sQTUFbWYR8nItLV5TMKaJCZVcTXhwEfAVYDi4GJsdkU4MH4ekFcJtY/7u4eyyfFUULHAjXA8o7akHYddRT061ew1YmIFLt8jgCOBubEETvdgHvc/WEzex6Yb2Y3AKuA2bH9bOAuM1tH+OU/CcDdnzOze4DngT3AdHdv7NjNacP8+fDKKzB9esFWKSJSzCz8OC9OtbW1XldX1zEfNnFimA7iuec65vNERIqUma1099r22qXjSmDQSWARkf2kKwFoGKiIyF5KACIiKZWeBFBeDm+/DY2FO+8sIlLM0pMArr4a3noLuqVnk0VE2pKO+wFAuCeAiIjslZ6fw6tWwRe/CFu3Jh2JiEhRSE8CePFFuPVWePnlpCMRESkK6UkAujG8iEgz6UsAuhhMRARIYwLQEYCICJC2BNCtG+zalXQkIiJFIT3DQGtqYM8esFz3pRERSZ/0JADt+EVEmklPF1BTE0ydCvffn3QkIiJFIT0JoFs3uOsuWF64m5CJiBSz9CQA0IygIiJZ0pUAysuVAEREonQlAB0BiIjsla4EMHgw9OyZdBQiIkUhPcNAAX73u6QjEBEpGuk6AhARkb3SlQC+8x249NKkoxARKQrp6gJ66ilYvDjpKEREikK6jgA0CkhEZK92E4CZDTWzxWa22syeM7PLY/nhZrbQzNbG5wGx3Mzse2a2zsyeMbNTsz5rSmy/1symdN5mtUIJQERkr3yOAPYAX3H3E4HRwHQzGw7MABa5ew2wKC4DnAvUxMc04DYICQO4FhgFnA5cm0kaBdO3L7z9NjQ2FnS1IiLFqN0E4O6b3f2P8fXrwGqgEpgAzInN5gDnx9cTgDs9WApUmNnRwDhgobtvc/dXgYXA+A7dmvYMHgzHHx+SgIhIyh3QOQAzqwZGAsuAwe6+GUKSAI6MzSqBjVlvq49lrZUXziWXwLp1++4OJiKSYnknADMrB+4DrnD319pqmqPM2yjffz3TzKzOzOoaGhryDU9ERA5QXgnAzHoSdv5z3T0zof6W2LVDfN4ay+uBoVlvHwJsaqO8GXef5e617l47aNCgA9mW9v3hD3D22bB2bcd+rohIF5TPKCADZgOr3f3mrKoFQGYkzxTgwazyC+NooNHAjthF9Bgw1swGxJO/Y2NZ4bz+ergOQEcWIiJ5XQh2JvBZ4E9m9lQs+xpwE3CPmU0FNgCfinWPAOcB64CdwOcA3H2bmV0PrIjtrnP3bR2yFfnK9P1rKKiISPsJwN2XkLv/HuCcHO0dmN7KZ90B3HEgAXYoJQARkb3SdyUwwBtvJBuHiEgRSFcC6N8fTj453BlMRCTl0jUZ3ODB8PTTSUchIlIU0nUEICIie6UrAcydC336gBlUV4dlEZGUSk8CmDsXpk2DXbvC8vr1YVlJQERSKj0J4Otfh507m5ft3BnKRURSKD0JYMOGAysXESlx6UkAw4YdWLmISIlLTwKYORPKypqXlZWFchGRFEpPApg8GWbNgqqqMAqoqiosT56cdGQiIolITwKAsLN/6SVoaoIVK8LNYfY/MSwikhLpSgDZ1qyBb34TZs9OOhIRkUSkNwH8/d/DmDHw7W/D7t1JRyMiUnDpTQAQrgGor4e77ko6EhGRgkt3Ahg3Dk47DW66CfbsSToaEZGCSncCMAtHASefDDt2JB2NiEhBpTsBAPzTP8EnPhGOBLp10yRxIpIa6bofQC6ZSeIyw0Ezk8SBrhEQkZKmIwBNEiciKaUEoEniRCSllAA0SZyIpJQSQK5J4nr00CRxIlLylAD2nySuvBw++lGdABaRkqdRQBB29pkdvntIBCIiJa7dIwAzu8PMtprZs1llh5vZQjNbG58HxHIzs++Z2Toze8bMTs16z5TYfq2ZTemczekAmZ3/woXhISJSovLpAvoZMH6/shnAInevARbFZYBzgZr4mAbcBiFhANcCo4DTgWszSaMoNTXBV74CF10Eb72VdDQiIp2i3QTg7r8Htu1XPAGYE1/PAc7PKr/Tg6VAhZkdDYwDFrr7Nnd/FVhIy6RSPLp1g1tvDReFfetbSUcjItIpDvYk8GB33wwQn4+M5ZXAxqx29bGstfLiddZZMGlSGA1UWalpIkSk5HT0KKBcZ0+9jfKWH2A2zczqzKyuoaGhQ4M7YGecEWYJ3bQpnBzOTBOhJFAYc+eGpKvkK9IpDjYBbIldO8TnrbG8Hhia1W4IsKmN8hbcfZa717p77aBBgw4yvA7y3//dskzTRBRGZo6m9euVfEU6ycEmgAVAZiTPFODBrPIL42ig0cCO2EX0GDDWzAbEk79jY1lx0zQRydEcTSKdLp9hoPOA/we818zqzWwqcBPwD2a2FviHuAzwCPACsA74MXApgLtvA64HVsTHdbGsuLU2HYQ7fPjD8Nhj4bV0rOXLwy/+XJR8RTqMeRHvwGpra72uri65APafKhrgsMPCPQSeeCKcG/jSl+DmmxMLsSTs3g2//jUccwy8//3w5z/D+94H77zTsm1VFbz0UsFDFOlKzGylu9e2105TQbRl/2kiqqrgxz8OieGFF2D2bPjMZ0LbtWtD27ffTjbmrsIdVqyAL3wh7PjPPx9++MNQ9973wk9/2nKOprIymDEDVq4sfLwiJUgJoD2TJ4dfnE1N4TkzZUTv3vD5z8Op8WLn+fPh4ovh2GPh29+G115LKuKu4dxz4fTTQ0I95xx4+OGQQDNyJd9Zs8LO/wMfCEddTU3JxS9SAtQF1FHcYfHicIP5hQuhf/9wNfE3vpF0ZMl74w24/3548MGQKHv2hDvuCDvwiROhoiL/z3rlFZg6NXzWeefBz34GSY8WEyky6gIqNDM4+2z4zW+grg7GjoUtW/bV19cnF1sSGhtDIvzsZ2HwYJgyBZ56al///ec/D//6rwe28wcYOBAeeAC+/31YtAhGjAj/3iJywJQAOsNpp8E994TpJACWLAldGBdcAE8/nWxsnS1z4vZ//zckwYceCudJliyBdeugpubQ12EG06fDsmXhfMHQoe2/R0RaUALoTJmZRd/97tAd9NBDcMop4X4DTz5ZOkNIt26F7343JL6rrgplY8aEbp+XX4bbb4czz+z4abZHjAjdboMHhyu2r7ii9eGjItKCEkAhHHVUODG8YUOYW2jFijCUtKuPGFqwAD7+8TCK54orwg5+xIhQ161b2MY+fQoTy/PPh/MKp5wSEo+ItEsJoJAGDICvfS38Sv31r8M1BY2NYQjkXXflHveepFxz8Sxfvu/I5Ve/glWr4Mor4dlnQ1/8v/xLMrGefHKIpaYGPvlJuPRSTeUt0g6NAkpafX0YEvnss+HK4yuvDKNc9h8DX2i5LoIzCzv/lSvD8Nft2+Fd74Lu3ZOLc3+7d8O//Rv853+GxPrAA0lHJFJw+Y4CUgIoBu7wyCPwH/8RTp4ecQT8/vfwxz+GuW82bAjJYebMg7tX8e7d4bqE116DHTvC5w8dCm++CXPmhLLs+kmT4LLLcvenDxwYRvKUlx/yZneqRx8N21lbG84PdO+uW31KaigBdFVLloSd8gc/CJdc0vwXeO/eYcf8z/8cpkyAcJ3B9u3Nd+Af+xh8+cvhHENFBeza1Xwd11wDN94YxtQfcUQo69EjXLvQvz9cfXVYd66/DbOudwHWZZdBQ0O4kKx//6SjEel0+SYA3RS+2IwZEx7V1S1nw9y1K0xR/be/wbx5oez73w875X79ws6tX7/QZw8hYVx+eSjLPPr3hxNPDPUDBoRROv37h7bZv5BvvDH3EUBrE+QVK/dwtPOjH4XzF/PmwejRSUclUhR0BFCsunVr/Rf4+vX7xr67d07XRq5zAGVl4Vf0wXRDJW3pUvj0p8M5lxtugK9+dV+iFCkxuhK4q2vtl/awYc0vfOqsfu3W5uLpijt/CL/6V60KQ1Ovv17TSougBFC8Zs7MPRvmzJmFi6G1ifC6qooKuPvukAiqq8PR01NPJR2VSGKUAIpVqf0CLxZm+6ajuPtuGDkyXL28e3eycYkkQOcAJL127gyjpW6/PUxNPW8eHHdc0lGJHDKdAxBpT1lZGB30y1+Gu5CNHKlpJCRVlABEJk4M5wJOOincqwByT4MhUmKUAEQg7OSffDJMbjd3brhfwfr14UTx+vVhSGwhk4ASkBSAzgGI7G/YMNi4sWV5ZWW4jmDLlnBRmdm+YbhmMGpUmCrj5ZfhT3/aV5551NaGi/E2bw73kM5+r1nogiorgx/8IMwJlT1b7GGHhUEAmXtQd7a5cztmGhJJRL7nAHD3on2cdtppLlJwZu7ht3/zh1mof+ih3PWPPx7qf/GL3PUrVoT622/PXb9mTaivqMhdX1kZ6m+5xf34491HjnQ/6yz3j3/c/YIL3N98M9QvXux+663uc+a433+/+29/6758uXtTU6jfvXvf61x+/nP3srLm6y4rC+WF8POfu1dVhX/vqqrCrbeEAHWexz5WRwAi+6uuzj0NxtCh4Rfx9u3h7maZ/zuZ3eQJJ4RpNRoa4C9/ab77hvAL/13vClN5rFnT/L0AZ5wBffu2fRV4U1M4UX3//fvmf8rMAbVmTTiHcdll4SgiW48eYairWejeuvPO5lOEHHNMmEAPwlHMtm0t119VFa4HWboUXn893Ouhd+/w3K9f+HeDMMlgz57hcaAXKhbDFehJH/10wPo1GZzIwUp6J9RaAsrsgNuTPftr5rFzJ4wfH+p/9atw74bs+l69YP78UN/aTjuTgD70Ifjd75rXjRwZZq+F0NW1cmVon0kQZ50V1gvhauzNm0N55jF6NMyY0fq2DxoU7jwH4e5zjY0hUWYeJ50U4nIPNwbKruvWDYYPDzG+8w48/HDL+pqacOe+OXPg4oubT6DYp0+Yg+vSS0P5Cy+E2WV79AjP3bvD4YeHv5E9e0ICzK7r3j3/aUc66G9PXUAihyLJboiku2CqqnJ3QVVVhfrVq92XLAldS//zP+733ef+6KP73j9njvvMme7f+Ib7VVe5f/GLodsq46KL3MeNC91Xo0a5jxjhPn16qGut+w32vf+ww1rWXXJJqNuzJ/d7v/rVUL99e+76664L9ZWVuesHDAj1zz+fu/7HPw71y5blrp83L9QvXuzet697v37hM484wv2oo8K/pbv7kUe2/W+fJ/LsAir4bKBmNh74LtAd+Im731ToGETaNXlycic9M+tNqhti5szcv0Iz05CccELb77/wwrbrZ81qvW7YsPZnod2yJewWm5r2PXr3DnXduoX3NzU1b1NREerLy8OQ3+z3NjWFE/wAmzbljmv79vB8zDHhgsHGxvBrv7ExPMaMCfVDh4ajhf3rTzop1FdWhqnWs+saG+HII0N9Q0Pu9XfW3FX5ZImOehB2+n8FjgN6AU8Dw1trryMAkYQkdQRU7Ec/XWT95HkEUOjrAE4H1rn7C+6+G5gPTChwDCLSnqQmAkx6DqykJ2Es8PoLnQAqgewB1vWxTEQkSHIW2qQTUIHXX+hzALmGFzQbhmRm04BpAMO62t2nRKTrS/L8T4HXX+gjgHog624mDAGanXVx91nuXuvutYMGDSpocCIiaVLoBLACqDGzY82sFzAJWFDgGEREhAJ3Abn7HjO7DHiMMCLoDnd/rpAxiIhIUPDrANz9EeCRQq9XRESa03TQIiIpVdRzAZlZA5C5LPAI4P8SDCdJad52SPf2a9vT61C2v8rd2x1FU9QJIJuZ1Xk+kxuVoDRvO6R7+7Xt6dx2KMz2qwtIRCSllABERFKqKyWANqYQLHlp3nZI9/Zr29Or07e/y5wDEBGRjtWVjgBERKQDFX0CMLPxZvZnM1tnZjOSjqezmdlQM1tsZqvN7DkzuzyWH25mC81sbXwekHSsncXMupvZKjN7OC4fa2bL4rbfHacRKUlmVmFm95rZmvg38IG0fPdm9qX4N/+smc0zsz6l/N2b2R1mttXMns0qy/ldW/C9uB98xsxO7YgYijoBmFl34AfAucBw4NNmNjzZqDrdHuAr7n4iMBqYHrd5BrDI3WuARXG5VF0OrM5a/hZwS9z2V4GpiURVGN8FHnX3E4ARhH+Hkv/uzawS+CJQ6+4nEaaKmURpf/c/A8bvV9bad30uUBMf04DbOiKAok4ApPAGMu6+2d3/GF+/TtgBVBK2e05sNgc4P5kIO5eZDQE+CvwkLhtwNnBvbFLK294P+CAwG8Ddd7v7dlLy3ROmpjnMzHoAZcBmSvi7d/ffA9v2K27tu54A3Blv+LUUqDCzow81hmJPAKm+gYyZVQMjgWXAYHffDCFJAEcmF1mn+g5wFdAUlwcC2919T1wu5b+B44AG4KexC+wnZtaXFHz37v434L+ADYQd/w5gJen57jNa+647ZV9Y7Amg3RvIlCozKwfuA65w99eSjqcQzOxjwFZ3X5ldnKNpqf4N9ABOBW5z95HAm5Rgd08usa97AnAscAzQl9Dtsb9S/e7b0yn/D4o9AbR7A5lSZGY9CTv/ue5+fyzekjnki89bk4qvE50J/KOZvUTo7jubcERQEbsFoLT/BuqBendfFpfvJSSENHz3HwFedPcGd38HuB84g/R89xmtfdedsi8s9gSQuhvIxD7v2cBqd785q2oBMCW+ngI8WOjYOpu7X+PuQ9y9mvBdP+7uk4HFwMTYrCS3HcDdXwY2mtl7Y9E5wPOk4LsndP2MNrOy+H8gs+2p+O6ztPZdLwAujKOBRgM7Ml1Fh8Tdi/oBnAf8Bfgr8PWk4ynA9o4hHNo9AzwVH+cR+sIXAWvj8+FJx9rJ/w4fAh6Or48DlgPrgF8CvZOOrxO3+xSgLn7/vwIGpOW7B/4dWAM8C9wF9C7l7x6YRzjf8Q7hF/7U1r5rQhfQD+J+8E+E0VKHHIOuBBYRSali7wISEZFOogQgIpJSSgAiIimlBCAiklJKACIiKaUEICKSUkoAIiIppQQgIpJS/x9ukh0RsKDrHQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xb101630>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 绘制不同PCA维数下模型的性能，找到最佳模型／参数（分数最高）\n",
    "#plt.plot(Ks, np.array(CH_scores), 'b-')\n",
    "plt.plot(Ks, CH_scores, 'ro--')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[<matplotlib.lines.Line2D at 0xb1bec50>]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHXBJREFUeJzt3Xt0VeWZx/HvkwBCFEUwihCSSAtaVBSNwljHsgQsqIO1eG0ctctKnaXDOKW2VKzTalOxOl7LVCmtt8a71VKtFWWw6IygQUVlQEHkEm8ElYpGLpFn/nhPTAgn5gTOOTvZ+/dZ66yT/Z7NybOzwy/7vPvd7zZ3R0RE4qUg6gJERCT7FO4iIjGkcBcRiSGFu4hIDCncRURiSOEuIhJDCncRkRhSuIuIxJDCXUQkhrpE9Y332msvLy8vj+rbi4h0SgsXLlzn7sVtrRdZuJeXl1NTUxPVtxcR6ZTMbFUm66lbRkQkhhTuIiIxpHAXEYkhhbuISAwp3EVEYqhThXt1NZSXQ0FBeK6ujroiEZGOKbKhkO1VXQ0TJ0J9fVhetSosA1RWRleXiEhH1GmO3KdObQr2RvX1oV1ERLbVacJ99er2tYuIJFmnCffS0va1i4gkWacJ96oqKCratq2oKLSLiMi2Ok24V1bCjBlQVhaWzeCWW3QyVUQknU4T7hCCfOVKuO8+cIdBg6KuSESkY+pU4d5o1Khw5P7kk1FXIiLSMXXKcO/TBw47DGbPjroSEZGOqVOGO8CYMTB/PmzYEHUlIiIdT6cN9+OOg4YGePrpqCsREel4Om24H3VUGAqpfncRke112nDfZRc45hiFu4hIOp023CH0uy9dCmvWRF2JiEjHklG4m9lYM3vdzJab2ZQ0r59rZnVm9nLq8b3sl7q9MWPCs47eRUS21Wa4m1khMB0YBwwBzjSzIWlWvc/dD009Zma5zrQOOgj69lW4i4i0lMmR+5HAcndf4e6bgXuBk3JbVmbMYPRoeOop2Lo16mpERDqOTMK9P9C8V7s21dbSBDN7xcweNLMBWakuA8cdB+vWwaJF+fqOIiIdXybhbmnavMXyn4Fydx8KPAXckfaNzCaaWY2Z1dTV1bWv0laMHh2edbWqiEiTTMK9Fmh+JF4CvNN8BXf/wN03pRZ/Cxye7o3cfYa7V7h7RXFx8Y7Uu5199w197+p3FxFpkkm4vwAMMrP9zKwbcAYwq/kKZrZvs8XxwJLsldi2MWPg2Wfhs8/y+V1FRDquNsPd3RuAi4AnCKF9v7svNrMrzGx8arVJZrbYzBYBk4Bzc1VwOmPGwKZN8Mwz+fyuIiIdl7m37D7Pj4qKCq+pqcnKe336KfTuDZMmwTXXZOUtRUQ6JDNb6O4Vba3Xqa9QbbTrrvCVr8CNN0JBAZSXQ3V11FWJiESnS9QFZEN1NSxfDlu2hOVVq2DixPC1bsMnIkkUiyP3qVObgr1RfX1oFxFJoliE++rV7WsXEYm7WIR7aWn72kVE4i4W4V5VFW7c0VxRUWgXEUmiWIR7ZSXMmAH9UzPe9OoVlnUyVUSSKhbhDiHIa2vh4INh2DAFu4gkW2zCvdE3vxmmIvjkk6grERGJTizDfcsWePrpqCsREYlO7ML96KOhRw944omoKxERiU7swr17dxg5UuEuIskWu3CH0DWzbBm89VbUlYiIRCO24Q46eheR5IpluO+/f7g6VeEuIkkVy3A3C0fvc+ZsP6GYiEgSxDLcIYT7hg0wf37UlYiI5F9sw33UKCgsVNeMiCRTbMO9Vy8YPlzhLiLJFNtwh9A1s3AhrFsXdSUiIvkV63A/7jhwhyefjLoSEZH8inW4H3EE7LmnumZEJHliHe6FhTB6NMyeHY7gRUSSItbhDrDHHvDuuyHoy8uhujrqikREci/W4V5d3RTm7rBqFUycqIAXkfiLdbhPnQqffbZtW319aBcRibNYh/vq1e1rFxGJi1iHe2lp+9pFROIi1uFeVQVFRdu2FRWFdhGROIt1uFdWwowZ0LdvWC4uDsuVldHWJSKSa7EOdwhBvnRp+HryZAW7iCRD7MMdwlj3ffaBN96IuhIRkfxIRLgDDB6scBeR5Mgo3M1srJm9bmbLzWzKl6x3ipm5mVVkr8TsULiLSJK0Ge5mVghMB8YBQ4AzzWxImvV6ApOABdkuMhsGD4a1a2H9+qgrERHJvUyO3I8Elrv7CnffDNwLnJRmvSuBXwEbs1hf1uy/f3hetizaOkRE8iGTcO8PrGm2XJtq+4KZDQMGuPujX/ZGZjbRzGrMrKaurq7dxe6MwYPD8+uv5/XbiohEIpNwtzRtX0yga2YFwPXA5LbeyN1nuHuFu1cUFxdnXmUWDBwIBQXqdxeRZMgk3GuBAc2WS4B3mi33BA4CnjazlcAIYFZHO6m6yy5hyl+Fu4gkQSbh/gIwyMz2M7NuwBnArMYX3f3v7r6Xu5e7ezkwHxjv7jU5qXgnaMSMiCRFm+Hu7g3ARcATwBLgfndfbGZXmNn4XBeYTY3hrrsyiUjcdclkJXf/C/CXFm2Xt7LuyJ0vKzcGD4ZPPw13ZurXL+pqRERyJzFXqELTiBl1zYhI3CncRURiKFHhPmAAdO+ucBeR+EtUuBcUwKBBCncRib9EhTtoOKSIJEMiw/3NN6GhIepKRERyJ5Hh3tAAK1dGXYmISO4kMtxBE4iJSLwlNtzV7y4icZa4cO/TB/bcU+EuIvGWuHA304gZEYm/xIU7KNxFJP4SG+61tWESMRGROEpkuDfeT3X58mjrEBHJlUSGu0bMiEjcJTLcv/rV8KxwF5G4SmS477orlJQo3EUkvhIZ7qARMyISbwp3EZEYSnS4f/ghrFsXdSUiItmX6HAHHb2LSDwp3BXuIhJDiQ338nLo0kXhLiLxlNhwv//+8HzVVSHoq6sjLUdEJKsSGe7V1TBxYtOt9latCssKeBGJi0SG+9SpUF+/bVt9fWgXEYmDRIb76tXtaxcR6WwSGe6lpe1rFxHpbBIZ7lVVUFS0ffv3v5//WkREciGR4V5ZCTNmQFlZuO1eSQn06gW33qorVkUkHhIZ7hACfuVK2LoV1qyB2bPhvffgtNNgy5aoqxMR2TmJDfeWjjgiHM3PnQuXXBJ1NSIiOyejcDezsWb2upktN7MpaV6/wMxeNbOXzexZMxuS/VJz7+yz4eKL4cYb4Y47oq5GRGTHtRnuZlYITAfGAUOAM9OE993ufrC7Hwr8Crgu65XmyTXXwKhR4eTq889HXY2IyI7J5Mj9SGC5u69w983AvcBJzVdw94+bLe4KePZKzK8uXeC++2DffeHkk+Hdd6OuSESk/TIJ9/7AmmbLtam2bZjZhWb2JuHIfVJ2yotGnz7wpz/B+vUwYQJs2hR1RSIi7ZNJuFuatu2OzN19urt/BfgxcFnaNzKbaGY1ZlZTV1fXvkrzbOhQuP12eO45uOgi8E77WUREkiiTcK8FBjRbLgHe+ZL17wW+le4Fd5/h7hXuXlFcXJx5lRE59VS49FKYORNuuSXqakREMpdJuL8ADDKz/cysG3AGMKv5CmY2qNniCcCy7JUYrSuugBNOgEmTYN68qKsREclMm+Hu7g3ARcATwBLgfndfbGZXmNn41GoXmdliM3sZ+AFwTs4qzrPCwjAV8MCBcMopmlxMRDoH84g6kysqKrympiaS770jli6F4cPhq1+FZ5+FHj2irkhEksjMFrp7RVvr6QrVDB1wQDiCf+klOP98nWAVkY5N4d4OJ54IV14ZQv66TnuZlogkgcK9nS69NPS9/+hHYbIxEZGOSOHeTmZw221w4IFwxhnw5ptRVyQisj2F+w7YbTd45JEQ9CedBJ98EnVFIiLbUrjvoIEDwxw0S5bAOeeEeeFFRDoKhftOGD0arr0W/vjHcOs+EZGOQuG+ky6+GM46Cy6/HGbNant9EZF8ULjvJLNwB6eKihDyS5ZEXZGIiMI9K3r0CF0zPXqEE6zr10ddkYgkncI9SwYMgIceCjfd/s534PPPo65IRJJM4Z5FRx8NN98Mjz8Ol6Wd0V5EJD+6RF1A3Hz/+/DiizBtGhx6KJx+etQViUgS6cg9B26+Gb7+dfjud2HRoqirEZEkUrjnQLdu8OCD0Lt3OMG6bl3UFYlI0ijcc6RvX3j4YXjvPTjtNNiyJeqKRCRJFO45dMQRYQz83Lnwwx9GXY2IJIlOqObY2WeHG3zccAMMGwbnnht1RSKSBDpyz4NrroFRo+CCC+D556OuRkSSQOGeB126hBkk+/WDk0+Gd9+NuiIRiTuFe5706RPmgF+/HiZMgE2boq5IROJM4Z5HQ4fC7bfDc8/B2LFQVgYFBVBeHu7LKiKSLTqhmmenngrjx287PfCqVTBxYvi6sjKaukQkXnTkHoGXX96+rb4epk7Nfy0iEk8K9wisWZO+fdWq0D2zfDm457cmEYkXdctEoLQ0BHlLZuGGHxBOwA4f3vQ48kjYc8/81ikinZfCPQJVVaGPvb6+qa2oCG65BQ45BObPhwULwuPxx5uO4vfff9vAHzoUunaNZhtEpGMzj+jzf0VFhdfU1ETyvTuC6urQx756dTiSr6pKfzL144/hhReawn7+fFi7NrzWvTscfngI+hEjwvOAAeETgIjEk5ktdPeKNtdTuHcu7qFLp3nYv/hi07j5vn23DfuKCujZc/v3yfSPi4h0LAr3BNm8GV55pSnsFyyAZcvCawUFcOCB23bnvPxymAqhZbfQjBkKeJGOTuGecB98EOaxaTzCX7AAPvoovGaWfjROWVm4B6yIdFyZhrtOqMZUnz4wblx4QAjzZctCyJ99dvp/s3p1/uoTkdzSOPeEMIPBg+Gf/zkcoadTWprfmkQkdzIKdzMba2avm9lyM5uS5vUfmNn/mdkrZjbHzFqJD+kIqqpCH3tLF1yQ/1pEJDfaDHczKwSmA+OAIcCZZjakxWovARXuPhR4EPhVtguV7KmsDCdPy8rCEX2/ftCrV5h3fuHCqKsTkWzI5Mj9SGC5u69w983AvcBJzVdw97nu3jj2Yj5Qkt0yJdsqK8PJ061b4e23Q6jvvnu4qchzz0VdnYjsrEzCvT/QfDaU2lRba84DHk/3gplNNLMaM6upq6vLvErJuYED4W9/g+JiOO44mDcv6opEZGdkEu7prndMO37SzM4CKoBr0r3u7jPcvcLdK4qLizOvUvKitDQEfElJmG/+qaeirkhEdlQm4V4LDGi2XAK803IlMxsNTAXGu7vuM9RJ9esXAn7QIDjxRHjssagrEpEdkUm4vwAMMrP9zKwbcAYwq/kKZjYMuJUQ7GuzX6bk0957w9y5cNBB4Z6vDz8cdUUi0l5thru7NwAXAU8AS4D73X2xmV1hZuNTq10D7AY8YGYvm9msVt5OOonevWHOnDA3zamnwj33RF2RiLRHRleouvtfgL+0aLu82dejs1yXdAB77AFPPBG6Zyorw+Rk554bdVUikgldoSpfqmfPMKf86NHw3e/CrbdGXZGIZELhLm0qKgo39D7xxHAV6403Rl2RiLRF4S4Z6d4dHnoIvv1tuPhimDYt6opE5Mso3CVj3brBfffBmWfCT34CP/uZbuQt0lFpyl9ply5d4K67wpH8z38OGzfCVVfp1n4iHY3CXdqtsBBmzgwBf/XV8NlncMMNCniRjkThLjukoACmT4dddgnBvmkT/Nd/hXYRiZ7CXXaYGVx3HfToEbpmNm6E3/0uHNmLSLQU7rJTzMLNP3r0gMsvD0fwd94JXbtGXZlIsincZaeZwU9/Gvrgf/SjEPD33BO6bEQkGuohlay55BK46aYw0di3vx26aUQkGgp3yap//dcwRcHjj4crWj/9NOqKRJJJ4S5ZN3Ei3H57mDZ43Dj4+OOoKxJJHoW75MTZZ8Pdd8P//m+4bd/69VFXJJIsCnfJmdNPhwcfhBdfDDfe/uCDqCsSSQ6Fu+TUt74FjzwCixfDyJHw/vtRVySSDAp3ybnjjw/3Yl2xAr7xDXj77agrEok/hbvkxahR8Ne/wjvvwLBhUFISpiooL4fq6qirE4kfhbvkzT/+I0yeDHV14ejdHVatCqNrFPAi2aVwl7y67bbt2+rrw5WtW7fmvx6RuNL0A5JXq1enb3/nHejTB4YPD48RI8Jz7975rU8kLhTuklelpaErpqU+fWDCBJg/H37xi6aj+MGDm8J+xAg4+GBNSiaSCYW75FVVVehjr69vaisqCjfdrqwMy598AjU1Iejnz4fZs8PdnyDMPnn44U1hP3x4ODkrItsyj+gmmBUVFV5TUxPJ95ZoVVfD1Kmhi6a0NAR+Y7Cn4x7WbQz7BQtg4ULYvDm83r//tmF/+OHhD4ZIHJnZQnevaHM9hbt0Rps2waJFIegbQ3/FivBaYSEccsi23TmDBjXdBrC9f1xEOhKFuyROXV1T2C9YEB4bNoTXeveGI48MR/SPPRb+ODQqKoIZMxTw0jko3CXxPv8cli7d9uj+1VfTr1tSAmvW5Le+JNKnpp2ncBdJo6Ag9OGnM2RImP9m5MgwTcLee+ezsvirrk5/Ml2fmtpH4S6SRnl5+qGYvXrBUUfBM880deUo7LOrtDT9p6OyMli5Mu/ldFqZhruuUJVEqarafiRNURH8+tehL/7DD0M3ztVXh9C580447TTYZx848EC48EJ44AFYuzaa+jujjz4K1y601u21ahUsWZLfmpJAR+6SOO3p921oCPPRP/10eDzzTBiHDyHsG4/sjzlGR/Ytvf02XH99uO3iJ5+EaxQ++2z79RpHMZ1+Olx2Wfi5SuvULSOSA41hP3duCPtnn00f9t/4BhQXR1hohF5/Ha65Jnzq2boVzjgjzB306qvp+9z/8z/D0fuvfx3uuXvKKSHkhw6Nbhs6skzDHXdv8wGMBV4HlgNT0rx+DPAi0ACcksl7Hn744S7S2W3e7D5/vvu0ae5jx7rvuqt7OGXrfuCB7hde6P7AA+5r1zb9mz/8wb2szN0sPP/hD1FVn13PP+8+YULYru7dw7avWLHtOl+27evWuV92mfvuu4ef38knu7/0Uj63oHMAajyT3G5zBSgE3gQGAt2ARcCQFuuUA0OBOxXukmRfFvYHHeQ+Zox7t25NbeBeVNR5A37rVvcnn3Q/9tiwLb16uU+d6v7++zv+nh9+6P4f/+G+xx7hPcePd6+pyVrJnV6m4d5mt4yZ/QPwM3f/Zmr5J6kj/qvSrHs78Ki7P9jWJwZ1y0gSbNkSpkpo7LOfPTv9UMwBA1qfMbMj+vxzePhhmDYtbN+++8IPfhC6XXbfPTvfY/16uPnm0G//0Udwwglw+eXhYrQky+Zomf5A8/Pctak2EWlD165h+oMpU8KdqFqzZg2MHQvXXRf6piM6FdamTZtg5kz42tfg1FPh44/ht7+Ft96CH/4we8EOYXjqT38ahklWVcFzz4UpJcaNC1/Ll8sk3C1N2w796pnZRDOrMbOaurq6HXkLkU6ttDR9e8+eIeAnTw4nEvv3h3POCSN7OsJNxT/+GK69FvbbD84/P4T4Aw+EIYzf+x7sskvuvvfuu8Oll4aQnzYtzBh61FFw3HHhhLakl0m41wIDmi2XAO/syDdz9xnuXuHuFcVJHUogidbaOPvf/AYWLw4B//vfh9E2jz0GZ50FffuG+87++McwZw5s3Ji/eteuDSNXysrgkkvChV1PPgkvvBBGtRQW5q+Wnj3Dz2DlyjAaZ9GicOvGY4+Fv/0tf3V0Gm11yhPmfF8B7EfTCdUDW1n3dnRCVeRLZTpa5vPPw4nEX/7SfeRI965dwwnGHj3CydrrrnN/7bVwUjPbVqwIo126dw91TpgQRsN0JJ9+6n799e59+4afyzHHuM+Zk5ufR0dCtkbLhPfieOANwqiZqam2K4Dxqa+PIBzhfwp8ACxu6z0V7iLts2GD+6OPuk+a5H7AAU2jbfr1cz/3XPe77952yOWOWLTI/TvfcS8sDH9MzjvPfenS7NSfK/X17jfdFH4O4H700e6zZ8c35LMa7rl4KNxFds6qVe4zZ7qfdpp7795NYX/YYe5TpoSj2I0bt/936T45zJvnfvzx4d/vtpv75MnutbX53qKd89ln7tOnu5eUhO0YMcL98cfjF/IKd5EEaWgI3Sa/+EXonujSxb8YQz9uXOi+WLw4BHlRUdMfAnAvKAjPe+3lfuWV7h98EPXW7JyNG91vucW9tDRs1xFHuP/5zyHk43ABWabhrukHRGJow4amcfWzZ8Mbb4T2wsIwRr2lPfeE2tp43Z5w8+YwBcIvfxmGapaVwXvvdf4btWhuGRH5wsqVYZTLxInpXzcL88DE0ZYtYUjp+eeHuYFa6mxTDmvKXxH5Qnl5CLeysvSvtzb+Pg66doVzz03/iQXCpGU33xyGVsbpD5zCXSRBWhtnX1UVTT351NofsMJCmDQJDj0U+vSBf/qnMI5+wYJw1N9ZKdxFEqSyMvQxl5WFrpiyss7X57yjWvvDdscdoVvmrrvClArLloUpikeMCFMgjBkDV1wRzmGkm4++o1Kfu4gkRqY3ann//XBjlnnzwvOiRWFsUdeuYeKyY44Jj6OOyu58OpnQCVURkSxZvx7+539C2M+bF+a3aWgIN1wfNqwp7I8+GvbaK7e16ISqiEiW9OoVphy++uowI+X69fDUU2HenZ49w9xAJ58c7r510EHwL/8C99wTbjXYXHV1OLldUBCeq6tzV7OO3EVEdtKmTeFovrEr59lnw7UGAAMHhqP6rl1Dv37zid92ZJy9umVERCLS0ACvvNLUjTNvHnzwQfp12zvOXuEuItJBbN0KXbqkvwlLey8gU5+7iEgHUVDQ+jj7XF1ApnAXEcmDfF9ApnAXEcmDfF9A1iU3bysiIi1VVubvamAduYuIxJDCXUQkhhTuIiIxpHAXEYkhhbuISAxFdoWqmdUBq1KLewHrIikketr25Ery9id522Hntr/M3YvbWimycN+mCLOaTC6njSNtezK3HZK9/UnedsjP9qtbRkQkhhTuIiIx1FHCfUbUBURI255cSd7+JG875GH7O0Sfu4iIZFdHOXIXEZEsijTczWysmb1uZsvNbEqUteSamQ0ws7lmtsTMFpvZv6Xae5vZk2a2LPW8Z9S15pKZFZrZS2b2aGp5PzNbkNr++8ysW9Q15oKZ9TKzB81saep34B+StO/N7N9Tv/evmdk9ZtY9rvvezH5vZmvN7LVmbWn3tQU3pTLwFTM7LFt1RBbuZlYITAfGAUOAM81sSFT15EEDMNndvwaMAC5Mbe8UYI67DwLmpJbj7N+AJc2WrwauT23/R8B5kVSVezcCf3X3A4BDCD+DROx7M+sPTAIq3P0goBA4g/ju+9uBsS3aWtvX44BBqcdE4DfZKiLKI/cjgeXuvsLdNwP3AidFWE9Oufu77v5i6usNhP/c/QnbfEdqtTuAb0VTYe6ZWQlwAjAztWzAscCDqVViuf1mtjtwDPA7AHff7O7rSdC+J0wv3sPMugBFwLvEdN+7+zzgwxbNre3rk4A7PZgP9DKzfbNRR5Th3h9Y02y5NtUWe2ZWDgwDFgD7uPu7EP4AAHtHV1nO3QD8CGi8Y2QfYL27N6SW4/o7MBCoA25LdUnNNLNdSci+d/e3gWuB1YRQ/zuwkGTs+0at7euc5WCU4W5p2mI/dMfMdgMeAi5294+jridfzOxEYK27L2zenGbVOP4OdAEOA37j7sOAT4lpF0w6qf7lk4D9gH7AroTuiJbiuO/bkrP/A1GGey0woNlyCfBORLXkhZl1JQR7tbv/MdX8fuPHsNTz2qjqy7GvA+PNbCWhC+5YwpF8r9RHdYjv70AtUOvuC1LLDxLCPin7fjTwlrvXufsW4I/AUSRj3zdqbV/nLAejDPcXgEGpM+bdCCdYZkVYT06l+pd/Byxx9+uavTQLOCf19TnAn/JdWz64+0/cvcTdywn7+r/dvRKYC5ySWi2W2+/u7wFrzGz/VNMo4P9IyL4ndMeMMLOi1P+Dxu2P/b5vprV9PQs4OzVqZgTw98bum53m7pE9gOOBN4A3galR1pKHbT2a8HHrFeDl1ON4Qr/zHGBZ6rl31LXm4WcxEng09fVA4HlgOfAAsEvU9eVomw8FalL7/xFgzyTte+DnwFLgNeAuYJe47nvgHsK5hS2EI/PzWtvXhG6Z6akMfJUwoigrdegKVRGRGNIVqiIiMaRwFxGJIYW7iEgMKdxFRGJI4S4iEkMKdxGRGFK4i4jEkMJdRCSG/h/j8TqB7FUpaAAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xb141d30>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# 绘制不同PCA维数下模型的性能，找到最佳模型／参数（分数最高）\n",
    "plt.plot(Ks, np.array(Sc_scores), 'bo-')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "两个指标的变化趋势类似"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MiniBatchKMeans(batch_size=100, compute_labels=True, init='k-means++',\n",
       "        init_size=None, max_iter=100, max_no_improvement=10, n_clusters=4,\n",
       "        n_init=3, random_state=None, reassignment_ratio=0.01, tol=0.0,\n",
       "        verbose=0)"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#显示聚类结果\n",
    "#画出聚类结果，每一类用一种颜色\n",
    "colors = ['b','g','r','k','c','m','y','#e24fff','#524C90','#845868']\n",
    "\n",
    "n_clusters = 4\n",
    "mb_kmeans = MiniBatchKMeans(n_clusters = n_clusters)\n",
    "mb_kmeans.fit(train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[3 3 3 ... 3 3 0]\n"
     ]
    }
   ],
   "source": [
    "#训练集x_train上的分类结果\n",
    "train_lable = mb_kmeans.labels_\n",
    "cents = mb_kmeans.cluster_centers_#质心\n",
    "print(mb_kmeans.labels_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([    6,     8,    10, ..., 13400, 13409, 13411], dtype=int64)"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index = np.nonzero(train_lable==1)[0]\n",
    "index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD8CAYAAABXe05zAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAADXlJREFUeJzt3X+I5Hd9x/HXK3eRmKhMIdPS3iXdCJI2BOzJkNgeCE2kxh/ov0lRqC3cP60dB0Fi/yjbP/pfcTN/iHBcYgVTQ4kRSkiNAQ1WsFdnk7RNchHCNZozkZtQpkbBpnFf/WNms7Pr7M73LvPd73x2nw9Yst/Nl9k3w92T77535r5OIgBAOa5oegAAwKUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIU5WseDXnvttVlZWanjoQHgQFpfX38lSbvKubWEe2VlRYPBoI6HBoADyfYPq57LqgQACkO4AaAwhBsACkO4AaAwhBsACkO4AeDN2Hkzmn24OQ3hBoDLtboq9XpbsU7Gx6urtX5bwg0AlyORRiOp39+Kd683Ph6Nar3yruUNOABw4NnS2tr4835//CFJ3e7463Z937qOmwV3Op3wzkkAh0IiXTG1vNjYuKxo215P0qlyLqsSALhcm+uRadM775oQbgC4HNM77W53fKXd7W7fedek0o7bdkvSGUk3S4qkP03yvdqmAoBlZ0ut1vad9ubOu9Vqfsdt+8uS/iXJGdtvkXR1ktFu57PjBnBoJNsjvfO4okvZcc+94rb9Dknvk/Qn45nymqTXLnkqADiIdka6xivtTVV23O+UNJT0JdtP2j5j+5qa5wIA7KJKuI9Keo+kLyY5Iennku7eeZLtU7YHtgfD4XDBYwIANlUJ9wVJF5KcnRw/qHHIt0lyOkknSafdrnT3HQDAZZgb7iQ/kfSi7RsnX7pd0rO1TgUA2FXVt7x/StL9k1eUnJf0yfpGAgDspVK4kzwlqdLLVAAA9eKdkwBQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIUh3ABQGMINAIU5WuUk2y9IelXSLyW9nqRT51AAgN1VCvfEHyZ5pbZJAACVsCoBgMJUDXckfdP2uu1TdQ4EANhb1VXJySQv2f51SY/Zfi7Jd6ZPmAT9lCRdf/31Cx4TALCp0hV3kpcm/70o6euSbplxzukknSSddru92CkBAG+YG27b19h+++bnkv5I0tN1DwYAmK3KquQ3JH3d9ub5/5DkG7VOBQDY1dxwJzkv6d37MAsAoAJeDggAhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhakcbttHbD9p++E6BwIA7O1Srri7ks7VNQgAoJpK4bZ9XNKHJZ2pdxwAwDxVr7jvkfRZSRu7nWD7lO2B7cFwOFzIcACAXzU33LY/IulikvW9zktyOkknSafdbi9sQADAdlWuuE9K+qjtFyQ9IOk221+pdSoAwK7mhjvJ55IcT7Ii6U5J30ry8donAwDMxOu4AaAwRy/l5CSPS3q8lkkAAJVwxQ0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFCYueG2fZXtf7P977afsf03+zEYAGC2oxXO+V9JtyX5me0rJX3X9j8n+deaZwMAzDA33Eki6WeTwysnH6lzKADA7irtuG0fsf2UpIuSHktydsY5p2wPbA+Gw+Gi5wQATFQKd5JfJvk9Sccl3WL75hnnnE7SSdJpt9uLnhMAMHFJrypJMpL0uKQ7apkGADBXlVeVtG23Jp+/VdL7JT1X92AAgNmqvKrkNyV92fYRjUP/j0kerncsAMBuqryq5D8kndiHWQAAFfDOSQAoDOEGgMIQbgAoDOEGgMIQbgAoDOEGgMIQbiylZO9j4DAj3Fg6q6tSr7cV62R8vLra5FTA8iDcWCqJNBpJ/f5WvHu98fFoxJU3IFV7yzuwb2xpbW38eb8//pCkbnf8dbu52YBl4dRwCdPpdDIYDBb+uDg8EumKqZ8HNzaINg422+tJOlXOZVWCpbO5Hpk2vfMGDjvCjaUyvdPudsdX2t3u9p03cNix48ZSsaVWa/tOe3Pn3WqxLgEkdtxYUsn2SO88Bg4adtwo3s5IE21gC+EGgMIQbgAoDOEGgMIQbgAoDOEGgMIQbgAozNxw277O9rdtn7P9jO3ufgwGAJityjsnX5f0mSRP2H67pHXbjyV5tubZAAAzzL3iTvJykicmn78q6ZykY3UPBgCY7ZJ23LZXJJ2QdLaOYQAA81UOt+23SfqapE8n+emM/3/K9sD2YDgcLnJGAMCUSuG2faXG0b4/yUOzzklyOkknSafdbi9yRgDAlCqvKrGkeyWdS/L5+kcCAOylyhX3SUmfkHSb7acmHx+qeS4AwC7mvhwwyXcl8Y9qAsCS4J2TAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhSHcAFAYwg0AhZkbbtv32b5o++n9GAgAsLcqV9x/L+mOmucAAFQ0N9xJviPpv/dhFgBABey4AaAwCwu37VO2B7YHw+FwUQ8LANhhYeFOcjpJJ0mn3W4v6mEBADuwKgGAwlR5OeBXJX1P0o22L9j+s/rHAgDs5ui8E5LctR+DAACqYVUCAIUh3HtIsucxADSBcO9i9fFV9R7tvRHrJOo92tPq46vNDgbg0CPcMyTR6Bcj9c/234h379Ge+mf7Gv1ixJU3gEbN/eXkYWRbax9YkyT1z/bVP9uXJHVv7WrtA2uy3eR4AA45rrh3MR3vTUQbwDIg3LvYXI9Mm955A0BTCPcM0zvt7q1dbfz1hrq3drftvAGgKey4Z7Ct1lWtbTvtzbVJ66oW6xIAjXIdV4+dTieDwWDhj7vfkmyL9M5jAFgU2+tJOlXOZVWyh52RJtoAlgHhBoDCEG4AKAzhBoDCEG4AKAzhBoDCEG4AKAzhBoDCEG4AKAzhBoDCEG4AKAzhBoDCLF24uUEvAOytUrht32H7B7aft313XcOsrq6q19txg95eT6urq3V9SwAoztxw2z4i6QuSPijpJkl32b5p0YMk0Wg0Ur/ffyPevV5P/X5foxE36AWATVVupHCLpOeTnJck2w9I+pikZxc5iG2trU1u0Nvvq9+f3KC329XaGvd6BIBNVVYlxyS9OHV8YfK1bWyfsj2wPRgOh5c1zHS8NxFtANiuSrhnVfNX9hZJTifpJOm02+3LGmZzPTJteucNAKgW7guSrps6Pi7ppUUPMr3T7na72tjYULfb3bbzBgBU23F/X9K7bN8g6ceS7pT0x4sexLZarda2nfbm2qTV4ga9ALCp0s2CbX9I0j2Sjki6L8nf7nX+m7lZMDfoBXAYXcrNgqtccSvJI5IeeVNTVcQNegFgb0v3zkkAwN4INwAUhnADQGEINwAUhnADQGEINwAUhnADQGEqvQHnkh/UHkr64Zt8mGslvbKAcQ4Kno8tPBfb8XxsKfm5+O0klf6hp1rCvQi2B1XfRXQY8Hxs4bnYjudjy2F5LliVAEBhCDcAFGaZw3266QGWDM/HFp6L7Xg+thyK52Jpd9wAgNmW+YobADDDUobb9h22f2D7edt3Nz1PU2xfZ/vbts/ZfsZ2t+mZloHtI7aftP1w07M0yXbL9oO2n5v8Gfn9pmdqku3e5O/J07a/avuqpmeqy9KF2/YRSV+Q9EFJN0m6y/ZNzU7VmNclfSbJ70p6r6Q/P8TPxbSupHNND7EE+pK+keR3JL1bh/g5sX1M0l9K6iS5WeObvtzZ7FT1WbpwS7pF0vNJzid5TdIDkj7W8EyNSPJykicmn7+q8V/MY81O1SzbxyV9WNKZpmdpku13SHqfpHslKclrSUbNTtW4o5LeavuopKtVw71xl8UyhvuYpBenji/okMdKkmyvSDoh6WyzkzTuHkmflbTR9CANe6ekoaQvTdZGZ2xf0/RQTUnyY0l/J+lHkl6W9D9JvtnsVPVZxnDPulfZoX7pi+23SfqapE8n+WnT8zTF9kckXUyy3vQsS+CopPdI+mKSE5J+Lukw/z7o1zT+yfwGSb8l6RrbH292qvosY7gvSLpu6vi4DvCPPPPYvlLjaN+f5KGm52nYSUkftf2Cxiu022x/pdmRGnNB0oUkmz+BPahxyA+r90v6ryTDJP8n6SFJf9DwTLVZxnB/X9K7bN9g+y0a/4LhnxqeqREe3yn5Xknnkny+6XmaluRzSY4nWdH4z8W3khzYq6q9JPmJpBdt3zj50u2Snm1wpKb9SNJ7bV89+Xtzuw7wL2sr3eV9PyV53fZfSHpU498M35fkmYbHaspJSZ+Q9J+2n5p87a+SPNLgTFgen5J0/+QC57ykTzY8T2OSnLX9oKQnNH411pM6wO+i5J2TAFCYZVyVAAD2QLgBoDCEGwAKQ7gBoDCEGwAKQ7gBoDCEGwAKQ7gBoDD/D7LPyYAh0bZLAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xbdc9d30>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#绘制质心分布\n",
    "for i in range(n_clusters):   \n",
    "    plt.scatter(cents[i,0],cents[i,1],marker='x',color=colors[i],linewidths=12)\n",
    "#plt.axis([-5,10,-6,6])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5,0,'cluster result')"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEKCAYAAAAFJbKyAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAEYdJREFUeJzt3X/MnWV9x/H3hwLi5g/QVsdatjJsnOgUtUEiixp0gLgJMeIwKlVZmBlzmGxuOBdQlEyjk6kTMzJQIE5k4CYzbKThh0ynQPkp0DE6QOhAKRYQNLKVfffHuQoP5Wl7rvY5z3lOn/crOXnu+3tf536+PYF+ev84152qQpKkYe007gYkSZPF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1GXncTcwCgsXLqylS5eOuw1JmijXXHPN/VW1aGvjdsjgWLp0KatWrRp3G5I0UZL8YJhxnqqSJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkddkhvzkuSRt96zWvHXcLc8Zrr/jWjOzHIw5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxWnVtd3uOvk3xt3CnPErJ35/3C1II+cRhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4jD44kC5Jcl+SbbX3vJFcmuS3J15Ls2upPa+tr2valU/bxoVa/Nckho+5ZkrR5s3HEcTywesr6J4FTq2oZ8ABwTKsfAzxQVS8ATm3jSLIvcBTwYuBQ4LQkC2ahb0nSNEYaHEmWAG8C/q6tBzgIOL8NOQs4oi0f3tZp21/fxh8OnFtVj1bVHcAaYP9R9i1J2rxRH3H8NfCnwP+19ecCD1bVhra+FljclhcDdwO07Q+18Y/Xp3nP45Icm2RVklXr1q2b6T+HJKkZWXAk+W3gvqq6Zmp5mqG1lW1bes8TharTq2p5VS1ftGhRd7+SpOGM8gmABwJvTnIYsBvwLAZHILsn2bkdVSwB7mnj1wJ7AWuT7Aw8G1g/pb7R1PdIkmbZyI44qupDVbWkqpYyuLh9aVW9A7gMeGsbtgL4Rlu+sK3Ttl9aVdXqR7W7rvYGlgFXjapvSdKWjeOZ438GnJvk48B1wBmtfgZwTpI1DI40jgKoqpuTnAfcAmwAjquqx2a/bUkSzFJwVNXlwOVt+XamuSuqqn4OHLmZ958CnDK6DiVJw/Kb45KkLgaHJKmLwSFJ6jKOi+Nzwis/ePa4W5gzrvnU0eNuQdIE8YhDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1MXgkCR1MTgkSV0MDklSF4NDktTF4JAkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXUYWHEl2S3JVkhuS3Jzko62+d5Irk9yW5GtJdm31p7X1NW370in7+lCr35rkkFH1LEnaulEecTwKHFRVLwP2Aw5NcgDwSeDUqloGPAAc08YfAzxQVS8ATm3jSLIvcBTwYuBQ4LQkC0bYtyRpC0YWHDXwSFvdpb0KOAg4v9XPAo5oy4e3ddr21ydJq59bVY9W1R3AGmD/UfUtSdqykV7jSLIgyfXAfcBK4L+AB6tqQxuyFljclhcDdwO07Q8Bz51an+Y9U3/XsUlWJVm1bt26UfxxJEmMODiq6rGq2g9YwuAo4UXTDWs/s5ltm6tv+rtOr6rlVbV80aJF29qyJGkrZuWuqqp6ELgcOADYPcnObdMS4J62vBbYC6Btfzawfmp9mvdIkmbZKO+qWpRk97b8dOANwGrgMuCtbdgK4Btt+cK2Ttt+aVVVqx/V7rraG1gGXDWqviVJW7bz1odssz2Bs9odUDsB51XVN5PcApyb5OPAdcAZbfwZwDlJ1jA40jgKoKpuTnIecAuwATiuqh4bYd+SpC0YWXBU1Y3Ay6ep3840d0VV1c+BIzezr1OAU2a6R0lSP785LknqYnBIkroYHJKkLgaHJKmLwSFJ6jJUcCS5ZJiaJGnHt8XbcZPsBvwCsDDJHjwx/cezgF8ecW+SpDloa9/j+H3gAwxC4hqeCI6fAF8YYV+SpDlqi8FRVZ8FPpvk/VX1+VnqSZI0hw31zfGq+nySVwNLp76nqs4eUV+SpDlqqOBIcg6wD3A9sHGeqAIMDkmaZ4adq2o5sG+brVaSNI8N+z2Om4BfGmUjkqTJMOwRx0LgliRXAY9uLFbVm0fSlSRpzho2OD4yyiYkSZNj2LuqvjXqRiRJk2HYu6oeZnAXFcCuwC7AT6vqWaNqTJI0Nw17xPHMqetJjmCap/hJknZ82zQ7blX9E3DQDPciSZoAw56qesuU1Z0YfK/D73RI0jw07F1VvzNleQNwJ3D4jHcjSZrzhr3G8Z5RNyJJmgzDPshpSZJ/THJfkh8luSDJklE3J0mae4a9OP4l4EIGz+VYDPxzq0mS5plhg2NRVX2pqja015eBRSPsS5I0Rw0bHPcneWeSBe31TuDHo2xMkjQ3DRsc7wXeBvwQuBd4K+AFc0mah4a9HfdjwIqqegAgyXOATzMIFEnSPDLsEcdLN4YGQFWtB14+mpYkSXPZsMGxU5I9Nq60I45hj1YkSTuQYf/y/yvg35Ocz2CqkbcBp4ysK0nSnDXsN8fPTrKKwcSGAd5SVbeMtDNJ0pw09OmmFhSGhSTNc9s0rbokaf4aWXAk2SvJZUlWJ7k5yfGt/pwkK5Pc1n7u0epJ8rkka5LcmOQVU/a1oo2/LcmKUfUsSdq6UR5xbAD+uKpeBBwAHJdkX+AE4JKqWgZc0tYB3ggsa69jgS/C43dwnQS8isFTB0+aeoeXJGl2jSw4qureqrq2LT8MrGYwQeLhwFlt2FnAEW35cODsGvgesHuSPYFDgJVVtb59l2QlcOio+pYkbdmsXONIspTBFwavBJ5fVffCIFyA57Vhi4G7p7xtbattri5JGoORB0eSZwAXAB+oqp9saeg0tdpCfdPfc2ySVUlWrVu3btualSRt1UiDI8kuDELjK1X19Vb+UTsFRft5X6uvBfaa8vYlwD1bqD9JVZ1eVcuravmiRc74LkmjMsq7qgKcAayuqs9M2XQhsPHOqBXAN6bUj253Vx0APNROZV0MHJxkj3ZR/OBWkySNwSjnmzoQeBfw/STXt9qfA58AzktyDHAXcGTbdhFwGLAG+Blt2vaqWp/kY8DVbdzJbZJFSdIYjCw4qurbTH99AuD104wv4LjN7OtM4MyZ606StK385rgkqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuowsOJKcmeS+JDdNqT0nycokt7Wfe7R6knwuyZokNyZ5xZT3rGjjb0uyYlT9SpKGM8ojji8Dh25SOwG4pKqWAZe0dYA3Asva61jgizAIGuAk4FXA/sBJG8NGkjQeIwuOqroCWL9J+XDgrLZ8FnDElPrZNfA9YPckewKHACuran1VPQCs5KlhJEmaRbN9jeP5VXUvQPv5vFZfDNw9ZdzaVttcXZI0JnPl4nimqdUW6k/dQXJsklVJVq1bt25Gm5MkPWG2g+NH7RQU7ed9rb4W2GvKuCXAPVuoP0VVnV5Vy6tq+aJFi2a8cUnSwGwHx4XAxjujVgDfmFI/ut1ddQDwUDuVdTFwcJI92kXxg1tNkjQmO49qx0m+CrwOWJhkLYO7oz4BnJfkGOAu4Mg2/CLgMGAN8DPgPQBVtT7Jx4Cr27iTq2rTC+6SpFk0suCoqrdvZtPrpxlbwHGb2c+ZwJkz2JokaTvMlYvjkqQJYXBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKnLyB7kJGnbHPj5A8fdwpzxnfd/Z9wtaBoecUiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSJK6GBySpC4GhySpi8EhSepicEiSuhgckqQuBockqYvBIUnqYnBIkroYHJKkLhMTHEkOTXJrkjVJThh3P5I0X01EcCRZAHwBeCOwL/D2JPuOtytJmp8mIjiA/YE1VXV7Vf0PcC5w+Jh7kqR5aVKCYzFw95T1ta0mSZplO4+7gSFlmlo9aUByLHBsW30kya0j72r7LQTuH3cT+fSKcbcwU8b/eZ403X+qE2vsn2f+aIf5PMf+WQKQrX6evzrMbiYlONYCe01ZXwLcM3VAVZ0OnD6bTW2vJKuqavm4+9hR+HnOLD/PmbOjfZaTcqrqamBZkr2T7AocBVw45p4kaV6aiCOOqtqQ5A+Bi4EFwJlVdfOY25KkeWkiggOgqi4CLhp3HzNsok6tTQA/z5nl5zlzdqjPMlW19VGSJDWTco1DkjRHGBxj4hQqMyfJmUnuS3LTuHuZdEn2SnJZktVJbk5y/Lh7mmRJdktyVZIb2uf50XH3NBM8VTUGbQqV/wR+i8GtxlcDb6+qW8ba2IRK8hrgEeDsqnrJuPuZZEn2BPasqmuTPBO4BjjC/za3TZIAv1hVjyTZBfg2cHxVfW/MrW0XjzjGwylUZlBVXQGsH3cfO4Kqureqrm3LDwOrcZaGbVYDj7TVXdpr4v+1bnCMh1OoaM5LshR4OXDleDuZbEkWJLkeuA9YWVUT/3kaHOOx1SlUpHFK8gzgAuADVfWTcfczyarqsaraj8GMF/snmfjTqQbHeGx1ChVpXNq5+AuAr1TV18fdz46iqh4ELgcOHXMr283gGA+nUNGc1C7mngGsrqrPjLufSZdkUZLd2/LTgTcA/zHerrafwTEGVbUB2DiFymrgPKdQ2XZJvgp8F3hhkrVJjhl3TxPsQOBdwEFJrm+vw8bd1ATbE7gsyY0M/sG4sqq+Oeaetpu340qSunjEIUnqYnBIkroYHJKkLgaHJKmLwSFJ6mJwSECSjyT5k2143+5J/mAUPc2EJHcmWTjX+9RkMTik7bM70PUXcgY2+/9emz15pnX3KW2OwaF5J8nRSW5sz0g4Z5rtlydZ3pYXJrmzLb+4PVvh+vb+ZcAngH1a7VNt3AeTXN3GfLTVlrZnXJwGXMuTp5zZeGRwYpJvA0cm2SfJvya5Jsm/Jfn1Nu7IJDe13q9otXcn+Zsp+/pmktdt8sd6Sp/StpqYZ45LMyHJi4EPAwdW1f1JntPx9vcBn62qr7SpYhYAJwAvaZPYkeRgYBmDqfMDXNieF3IX8ELgPVW1uX/5/7yqfrPt5xLgfVV1W5JXAacBBwEnAodU1X9vnMpiSE/qU9oeBofmm4OA86vqfoCq6nmOx3eBDydZAny9/aW+6ZiD2+u6tv4MBkFyF/CDrTzA52vw+My0rwb+Ycr+n9Z+fgf4cpLzACcg1FgYHJpvwtansN/AE6dxd9tYrKq/T3Il8Cbg4iS/B9w+zf7/sqr+9knFwbMtfrqV37tx+07Ag9MdHVTV+9oRyJuA65Pst0m/T+pZGgWvcWi+uQR4W5LnAmzmVNWdwCvb8ls3FpP8GnB7VX2OwWzGLwUeBp455b0XA+9tRw0kWZzkeT0Ntudf3JHkyLaPJHlZW96nqq6sqhOB+xlcK7kT2C/JTkn2YnCabFOb9iltM484NK9U1c1JTgG+leQxBqeU3r3JsE8D5yV5F3DplPrvAu9M8r/AD4GTq2p9ku8kuQn4l6r6YJIXAd9tp5keAd4JPNbZ6juALyb5CwaPGz0XuAH4VLsoHwYheEMbfwfwfeAmBhffN/1z/3jTPjv7kR7n7LiSpC6eqpIkdTE4JEldDA5JUheDQ5LUxeCQJHUxOCRJXQwOSVIXg0OS1OX/Aa93brN4A67AAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xce20da0>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import seaborn as sns\n",
    "sns.countplot(train_lable)\n",
    "plt.xlabel('cluster result')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(3803, 1)"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x1 = train.iloc[index].iloc[:,[0]]\n",
    "x1.shape\n",
    "#x1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2],\n",
       "       [0],\n",
       "       [5],\n",
       "       ...,\n",
       "       [1],\n",
       "       [4],\n",
       "       [6]], dtype=int64)"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x12=np.array(x1)\n",
    "x12"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([2], dtype=int64)"
      ]
     },
     "execution_count": 66,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x12[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4369, 1)"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x2 = train.iloc[index].iloc[:,[1]]\n",
    "x2.shape\n",
    "#x2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 1, ..., 1, 1, 1])"
      ]
     },
     "execution_count": 52,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_i = train_lable[index]\n",
    "y_i.shape\n",
    "y_i"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAAEVCAYAAAA1lUZ4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzt3XuUXHWV6PHvzjuE5pkQMYSJEkBzUZrQMjoDJLwicKPiqIzIjagwcWYBAzPIDMQZIVe9gvJQkyyuIAqOjA5jUMDJIBFwBeZyHbpDopA4BAzvXAgP8yBAh87v/nGqSYdUpavSXVWnqr6ftWpV1elfndonZ6V3n3N+++xIKSFJkvJhSL0DkCRJW5mYJUnKEROzJEk5YmKWJClHTMySJOWIiVmSpBwxMUtqPRFTiFhGxCYiXiDiG/UOSeplYpbUikYB/wR0ADcDXyDi2PqGJGWG1TsASaq5lJYCSwGIuAv4K2CveoYk9Qrv/CWpZUXsDtwLjAYOJaVNdY5I8ohZUovKkvKdwN7A0SZl5YXXmCW1nojdgMXAgcDpwOuFZVLdmZgltaKpwPuAPYF7gKeAv61rRFKB15glScoRj5glScoRE7MkSTliYpYkKUdMzJIk5YiJWZKkHDExS5KUIyZmSZJyxMQsSVKOmJglScoRE7MkSTliYpYkKUdMzJIk5YiJWZKkHDExS5KUIyZmSZJyxMQsSVKOmJglScqRfhNzRIyKiP+MiOUR8XBEzC0sf0dE/DoiVkXEv0TEiOqHK0lScyvniPl14NiU0qFAO3BiRLwfuBy4OqV0IPAycGb1wpQkqTX0m5hTZmPh7fDCIwHHAj8pLL8ROKUqEUqS1EKGlTMoIoYCXcBkYAHwGPCHlNIbhSFPAxNKfHY2MBtgzJgxh7/rXe8aaMyS1FK6urpeSCmNq3ccqo2yEnNKqQdoj4g9gJ8C7y42rMRnrwWuBejo6EidnZ07GaoktaaIeKLeMah2KpqVnVL6A/Ar4P3AHhHRm9j3A54d3NAkSWo95czKHlc4UiYiRgPHAyuBe4CPF4adAdxarSAlSWoV5ZzK3he4sXCdeQhwc0rp5xGxAvhxRHwFeBC4vopxSpLUEvpNzCml3wCHFVn+e+CIagQlSVKr8s5fkiTliIlZkqQcMTFLkpQjJmZJknLExCxJUo6YmCVJyhETsyRJOWJiliQpR0zMkiTliIlZkqQcMTFLkpQjJmZJknLExCxJUo6YmCVJyhETsyRJOWJiliQpR0zMkiTliIlZkqQcMTFLkpQjJmZJknLExCxJUo6YmCVJyhETsyRJOWJiliQpR0zMkiTliIlZkqQcMTFLkpQj/SbmiJgYEfdExMqIeDgizissvzQinomIZYXHydUPV5Kk5jasjDFvABeklJZGRBvQFRGLCz+7OqV0RfXCkySptfSbmFNKa4A1hdcbImIlMKHagUmS1IoqusYcEZOAw4BfFxadExG/iYjvRcSegxybJEktp+zEHBG7AguB81NK64FrgAOAdrIj6itLfG52RHRGROfatWsHIWRJkppXWYk5IoaTJeWbUkq3AKSUnksp9aSUtgDXAUcU+2xK6dqUUkdKqWPcuHGDFbckSU2pnFnZAVwPrEwpXdVn+b59hn0UeGjww5MkqbWUMyv7T4FZwG8jYllh2RzgtIhoBxLwOPD5qkQoSVILKWdW9n1AFPnRosEPR5Kk1uadvyRJyhETsyRJOWJiliQpR0zMUt6sWAHt7bDLLjB2LFx4Yb0jklRDJmYpb157DWbNgs5OOPVUuOIKuPvuekclqUbKKZeSVEtTp2YPgOOOg2uugZdeqm9MkmrGI2Ypr9atg7lzYfJkONmuqlKr8IhZyqN162DGDHjxRViyJLveLKkleMQs5c369XDCCbBqFdx0E4wcmS2T1BJMzFLeLF0KDzwAL78MxxwDEyfCVVf1/zlJTcFT2VLeTJ8OKdU7Ckl14hGzJEk5YmKWJClHTMySJOWIiVmSpBwxMUuSlCMmZkmScsTELElSjpiYJUnKEROzJEk5YmKWJClHTMySJOWIiVmSpBwxMUuSlCMmZkmScsTELElSjpiYJUnKkX4Tc0RMjIh7ImJlRDwcEecVlu8VEYsjYlXhec/qhytJUnMr54j5DeCClNK7gfcDZ0fEFOAi4K6U0oHAXYX3kiRpAPpNzCmlNSmlpYXXG4CVwATgI8CNhWE3AqdUK0hJklpFRdeYI2IScBjwa2B8SmkNZMkb2Gewg5MkqdWUnZgjYldgIXB+Sml9BZ+bHRGdEdG5du3anYlRkqSWUVZijojhZEn5ppTSLYXFz0XEvoWf7ws8X+yzKaVrU0odKaWOcePGDUbMUnWsWAHt7bDLLjB2LFx4Yb0j6l8jxixph8qZlR3A9cDKlNJVfX50G3BG4fUZwK2DH55UQ6+9BrNmQWcnnHoqXHEF3H13vaPasUaMWdIODStjzJ8Cs4DfRsSywrI5wGXAzRFxJvAk8InqhCjVyNSp2QPguOPgmmvgpZfqG1N/GjFmSTsUKaWafVlHR0fq7Oys2fdJO2XdOjjqKHj1VVi+PDtNnHeNGLPKFhFdKaWOeseh2ijniFlqHevWwYwZ8OKLsGRJYyS4RoxZUkneklPqtX49nHACrFoFN90EI0dmy/KsEWOWtEMmZqnX0qXwwAPw8stwzDEwcSJcdVX/n6unRoxZ0g55KlvqNX061HDOxaBoxJgl7ZBHzJIk5YiJWZKkHDExS5KUIyZmSZJyxMQsSVKOmJglScoRE7Ok7dm1SqobE7Ok7dm1SqobbzAiaXt2rZLqxiNmSaWtWwdz58LkyXDyyfWORmoJHjFLKs6uVVJdeMQsaXt2rZLqxsQsaXt2rZLqxlPZkrZn1yqpbjxiliQpR0zMkiTliIlZkqQcMTFLkpQjJmZJknLExCxJUo6YmCVJyhETsyRJOWJiliQpR0zMkiTlSL+JOSK+FxHPR8RDfZZdGhHPRMSywsN+cJIkDYJyjphvAE4ssvzqlFJ74bFocMOSJKk19ZuYU0pLgJdqEIskSS1vINeYz4mI3xROde9ZalBEzI6IzojoXLt27QC+TpKk5rezifka4ACgHVgDXFlqYErp2pRSR0qpY9y4cTv5dVILWbEC2tthl11g7Fi48MJ6RySphnYqMaeUnksp9aSUtgDXAUcMblhSC3vtNZg1Czo74dRT4Yor4O676x2VpBrZqcQcEfv2eftR4KFSYyVVaOpUuOACmDIFjjsuW/aS0zykVjGsvwER8SNgOjA2Ip4GLgGmR0Q7kIDHgc9XMUapNa1bB3PnwuTJcLIViVKr6Dcxp5ROK7L4+irEIqnXunUwYwa8+CIsWZJdb5bUErzzl5Q369fDCSfAqlVw000wcmS2TFJLMDFLebN0KTzwALz8MhxzDEycCFddVe+oJNWIiVnVYcnPzps+HVLa9nHppQNfr/tEaggmZlWHJT/54z6RGkK/k7+knTJ1avaArOTnmmss+ak394nUECKlVLMv6+joSJ2dnTX7PuXAunVw1FHw6quwfLmzi/PAfdJwIqIrpdRR7zhUGx4xq3os+ckf94mUe15jVnVY8pM/7hOpIZiYVR2W/OSP+0RqCJ7KVnX0lvwoP9wn1bdiBXzqU/DII9llgs9+Fr7xjXpHpXqLmAL8M3AQsAn4PimVrFf0iFmSBoslaSpuFPBPQAdwM/AFIo4tNdgjZkkaLJakqZiUlgJLAYi4C/grYK9Swz1ilqTBZmcwFROxO1mHxkeBRaWGecQsSYPJkjQVkyXlO4G9gaNJaVOpoR4xS9JgsSRNxUTsBiwGDgROB14vLCvKxCxJg8WSNBU3FXgfsCdwD/AU8LelBnsqW5IGiyVpKialXwFR7nCPmKW8qaQ9Yx5aOeYhBqmJmJilvKmkFjYPdbN5iEFqIp7KlvKmklrYPNTN5iEGqYnY9lHKq0raM+ahlWMeYmhStn1sLR4xS3lUSS1sHupm8xCD1CS8xizlTSW1sHmom81DDFITMTFLeVNJLWwe6mbzEIPURDyVrfLNn08696+BbF7CxjHj2XXD/yPKrs5TWSqphc1D3WweYpCaiEfMKtvTK/7ACt7F4o/M4/Uxe9L2ynP8/oN/We+wJKmpmJhVtuvG/QOHsIKp3z2HkcceBcC6lc/UOSpJai79JuaI+F5EPB8RD/VZtldELI6IVYXnPasbpvLgueey57anHiJ+fjsJuPKdC+oakyQ1m3KOmG8ATnzLsouAu1JKBwJ3Fd6ryY0fD+/mIUZ0HEpKiTOH/xPDJu1f77Akqan0m5hTSkuAt97G5yPAjYXXNwKnDHJcyqE/+28r+S2HwpYtPHL82Ty3eQ9OO2xlvcOSpKays9eYx6eU1gAUnvcpNTAiZkdEZ0R0rl27die/Tnlw6JIFDGULARz8ywX8Gx/ig1efVO+wJKmpVH3yV0rp2pRSR0qpY9y4cdX+Ou2EBQtg//3hbW+DL31pB5Uv8+dnP+zziCcer2Wo1VWtLkl2X5JUgZ1NzM9FxL4AhefnBy8k1VJXF5xzTva44gr48pfhpz+td1R1Uq0uSXZfklSBnU3MtwFnFF6fAdw6OOGo1m67LXv+3Ofg9NNhzBi4tVX35tSpcMEFMGVK1iUJBqdLUrXWK6kplVMu9SPgfuDgiHg6Is4ELgNOiIhVwAmF92pAb5ZAtUEE7Lrr1mUta906mDsXJk+Gk0/O/3olNZV+b8mZUjqtxI+OG+RYVAfjx2fP69dnlz83bNi6rCVVq0uS3Zcklck7f7W4mTOz5+9/P2sMtGkTfOhD9Y2pbqrVJcnuS5IqYGJuce97H3z72zBvXnYZdM4c+NjH6h1VnVSrS5LdlyRVIFINu8J0dHSkzs7Oqq1/wQK4/HLo7obZs7PLeXY+ktToIqIrpdRR7zhUG03T9rG37Ofyy+Htb8+qU9rb4c/+rN6RSZJUvqY5lW3ZjySpGTRNYrbsR5LUDJomMfct+0nJsh9JUmNqmsRs2Y8kqRk0zeSv3rKfr389m5Xd0mU/kqSG1VTlUpLUjCyXai1NcypbOZOjVodlt7WUpBwwMas6ctLq0LaWkhqNiVnVkZNWh9a3S2o0JmZVV51bHVrfLqnRmJhVPX1bHd5xR11aHVrfLqnRmJhVHTlpdWh9u6RG0zR1zMqZ3laHkLU6BLjkErj00pqGYX27pEbjEXODaLiSn+nTsyD7PnaQlKu5feeeC089lV1b/upXbQUqKd88Ym4Azd7Sstm3T5Iq4RFzA2j2kp9m3z5JqoSJuQE0e8lPs2+fJFXCxNwAmr3kp9m3T5IqYWJuAM1e8tPs2ydJlXDyVwNo9pKfZt8+SapErts+bu7ZzLQbptG1povunm5Wn7eaSXtMGnAcm3s2c/Bl01j9ehcM7ebcntV8a+6kkmU0CxZkM4a7u2H27OwOky1ZcrNiBXzqU/DII9ldvD77WfjGN+odldT0bPvYWnJ9KjsimHnQTD5y8EcGdb1LlwarfzGT94zI1jtvXumOQ3Yn6iMnHaMkqZnlOjEPGzKMOUfN4aC9DxrU9S76+TC4dw7Ht2frHT26dHmOpTx95KRjlCQ1swFdY46Ix4ENQA/wRqOcauktxRkxMnseM6Z0eY6lPEXUuWOUJDWzwZj8dUxK6YVBWE/N9JbidL+ePW/cWLo8p28pz9ixlvJs0zFqyZK6dIySpGaW61PZAL974Xe8uOlFAB576THWbFgz4HXOnAmM/R33L8vW+9roxzjyxOLrtZSnj5x0jJKkZjagWdkRsRp4GUjAd1JK1+5ofKWzsgFi7rbTn8849AxuOOWGygId4HrnzdtaynPWWfCVr7TorOxf/Wprp6hedegYJbUaZ2W3loEm5renlJ6NiH2AxcC5KaUlbxkzG5gNsP/++x/+xBNPlL3+apVLVWpT9ybGXzmejd0bAbj3s/dy5P5HFh2bl5jrztKqxub+yxUTc2sZ0KnslNKzhefngZ8CRxQZc21KqSOl1DFu3LiK1l+tcqlKDRkyhA/s9wH2222/fsfmJea6s7Sqsbn/pLrZ6cQcEWMioq33NTADeGiwAoPqlUtVatSwUdw5607+aPc/6ndsXmKuO0urGpv7T6qbgczKHg/8NLKLrcOAf04p3TEoUal5WFrV2Nx/Us3tdGJOKf0eOHQQY1GzsbSqsbn/pLpoyXKpnbFo1SJefvVlAO5/6n6WrVlWcmxeYq4rS6sam/tPqptcN7GA6pVLVeqtcRyw5wE8+tePljW2XjHXlaVVjc39lyvOym4tuU/MktTqTMytpeansjd1b6Lta23E3CDmBvc9ed+gjK00hlFz24hLs/V+5kv3saO/T049FYYNg6FD4eijYcuWwRnLbbdlHTQiYMiQrDFxzm3u2cyfXP8njPzKSGJu8PgfHq99ECtWQHt7ds1z7Fi48MLaxyBJVVLzxFxJTXAlYyvx4INDeP3RD7D7kGy9N95YupXjD38I//qv2RyY2bPh3nvhoosGPhbIbrx9/PFZu6opU7Ka0SuvHNjGVVku6rStsZXUxGqemCupCa5kbCXuXDQKfngnB4/P1jtyZOlWjtddlz1ffTUsWJC9Xrhw4GOBrI/k7bfDhz8MJ52ULXv22fI3pA5yUadtja2kJpb7WdnV0Nu2cWihWGyXXUq3cly7Nnved9/sbPOQIVkVyUDHbuPJJ2H+fBg+HC6+uOztaHnW2EpqQi2ZmHvbNva8kT2/sql0K8feu4g+80x2vXjLFth994GPfdOTT2ZHft3dcMcd2TVT9a9vje0dd1hjK6lp1CUxV1ITXMnYcs2cCUxexOPPZevtHns/hxxffL2f+1z2fMEFcPbZ2etTTim+3krGAvD001lS3rQpu7bc1pYty7m612lbYyupidWlXGogNcE7GluJStb78Y/Dz36WHQF/4APZpK4hJf6kqWQs3/wm/M3fbLts2rSshjTH6l6nbY2tWozlUq1lIPfK3inz/2Yxk+bBmh4YBRz9zgnc/HDx3hebujex64hdy263ePBl01j9ehcM7ebcntV8a+6kon2T1726jmAIKW2BgI/13My/nvuJkjEfPX0zt+8zjZ6xXfyfod08sW4179hzUtGxP/nJjrZ+W1N/cD4Pcj4AEyfC6tVZmVUxCxbA5ZdnZ7xnz84urdarJ3S6pLw/5qoW8/Tp7LC+rZXYnlFqOjU9lf3KK+/ium8nOg7ZlUmnj6BjPNz++2f4t8suKx5cBeVSS5cGq38xk/eMyMp45s0rXQK1bNlQ0ovvZHiMBrKZ06XGdnXBeecFx0yYyRG7Zev+xS/6DadfX/4yPPggHHwwHHssPPUUfOxjpWM455zsccUV2WdLxZsXjRhzQ7J0TGo6NU3M69YdzW+2zOCaX27glcP34YW3w0jgoLa2ouMrKZda9PNhcO8cjm/PynhGjy5dAnX3HbvCvFWMG7M3ACNGlB57223AlmH88PNzOP6wbN2LF/cbTr+++93s+YYb4M47s9d33bWDGMiuYZ9+OowZUzrevGjEmBuSpWNS06npqezNm7NEuHz5vTzzd0/z9Btw9NARvGPWrAGvu7fcacTI7HnMmNIlUL3Lo/BnyajR/Y9ta9t6GvaFFwYc7ptzlQ44YOvp69dfLy+GXXctHW9eNGLMDc3SMalp1PSIefjwbCbvQW87kJ+MG8aFI2BJTzff+/GPB7zu3nKn7kJy27ixdAlU7/JUuF3mq6/2P3b9+q2XNQejomm33bLnRx6Bnp7s9ciR5cWwYUPpePOiEWNuWJaOSU2lpol5993vYxfu465phzLppTe45b3Z8ide+K+Snym3XGrmTGDs77h/WZb8Xxv9GEeeWLyMZ+ZMYOp3ePmVVwDYPGEx+08vfn565szs+fLrf8f//U227kOOGniJ0Gc+kz2feWb2OxWyOU07iuH738+qgzZtgg99aEBfX3WNGHNDsnRMajo1L5c6eb8/58Zb/441wF7Ax4G3TR/OP9zTXTzAKrVbfOvYthFtrL+4+C+0efPgr18a/BKh9nZYvjx7PWECPPFE6VnZ8+bB17+ezXA+6yz4ylfqNyu7XI0Yc8OxdKwlWC7VWmqemDd1tbOS67dZfu+993LkkduXQd1w2f/isxd/cZtly5cv573vfe92Yyspl9rUvYnxV44vqwwL4OOnbmbhbtNg3y4Y1s1j567mnXtNKjp2c89mpt0wja41XXT3dLP6vNVM2mPgYyuVp/IqSQNjYm4tNT2VvXz57ezBLoxgCOOY+ubyDx57bNHx49vaaBs5hLe9Z+vF1xnTphUdW0m5VCVlWD/8ISxcGBzETN65OVv35ZeXHl9J96VqdWqyVEmSGldNE/Mbb7yN+/k2s8/p4fFXuug9gJuy115Fx5909rmsf62HA87vePPuWX98wAFFx1ZSLlVJGdZ11wFbhnHbhXP45AnZuv/930uPr6T7UrU6NVmqJEmNqy73yv7RjyYwZkzQexL977/61ZJj29vb+Y8z/4MthRnU5335y0XHVVIuVYm+HaN6TwVv2DDw9VaTpUqS1LjqkpiPe+83mAZvHjGf+hd/UXLsd7/5Tf77LkPpnRN18kc/WnRcJeVSlejbMar3cnyJ+6HkhqVKktS4apqYhw5dy1CmcNc9p7M7vHnEPLREl4eTjj+eT844gbe/1sOWwpAhQ4uPraRcCsovw+rtGPX5L/6On92ZrfuPT9xxuVQl3Zeq0anJUiVJalw1n5X9UNcKXufVbZavPOww3rV06XbjD5wwgUeffXabZZccOYZL791YdP0DKZfqr7vUwvfs/LoHa2wlLFWSmoezsltMSqlmj8MPPzydz9BEdrD85mPRokWpmA+/733bjZ0/f37RsRd96aLtxv7yl78sOrYit96a0qhRKUFKESl1dOxw+E1ffDg9NPzQ9Aqj08bRe6ctF3xh4DE0oocfTunQQ1MaPTqlvfdO6Qs7/neYPz+liRNTGj8+pX/8x5S2bKlRnFIDADpTDX9X+6jvo6ansh98cDHvoIdhQHuf5R8ucW/fSXvuAcCokVvDPOecc4qO3W///bIT8/tvXXbyYNwzeMMGOP74bFrzlClZF58rryw6tKsLrvjqazw/YxZ3XdbJja+eSlzZot1+Kuh6ZHmXJG1V08S8ZcsenEfiz09PLLt06/L9S4z/1i/uJKXEBYsufnPZ6BK3xjr7zLNJPYkRZ414c9nkyZMHHvTpp8Ptt8OHPwwnnZQte8vp9V633QYPMpX33HABM/9uCv8xsoW7/VTQ9cjyLknaakDdpSLiROBbwFDguyml4o2V3+KWW3an72Xmi0o1Iia7GUh2Jifzl2edVXLs2LFj6X5x6609L7roonLCKc+TT8L8+TB8OFx8cdEh25QprV/HF3vm8swuk5nQyt1+yuh6ZHmXJG2100fMETEUWACcBEwBTouIKeV89rwx7+eQPu9nL1xYcuxlF13Ebn3eX/2d75Qc+4Mf/ADevvX9pz/96XLC6d+TT2ZHft3dWfeeEu2lekuSNjy9jjRjBnv0vMg3P9jC3X7K7HpkeZckbTWQU9lHAI+mlH6fUuoGfgzs8N6SEevZjTa+9cKd2yTbUkaNHMnff+1rjC5j7MEHH8wpHz8F+rROjMGYhvz001lS3rQpu7bc1pYtK2LmTGhjPZunn0D3w6v4VLqJ6R9s0W4/FXQ9srxLkvrY2VljZI2hvtvn/Sxg/o4+c/jhh6fRb5k5DaROSMW8dRyQFkwYWXRsW1vb9uP/ovh6K3L11SllB3JbH9OmlRy+8Nx7th9/ySUDj6PR3HNPRf8O3/52Svvtl9I++6Q0Z46zsqW+cFZ2Sz12uo45Ij4BfDCldFbh/SzgiJTSuW8ZNxuYXXh7CPDQTn1hYxgLvFDvIKqombevmbcN3L5Gd3BKKef3HNRgGcjkr6eBiX3e7wdsN105pXQtcC1ARHSmJi6Sd/saVzNvG7h9jS4iOusdg2pnINeYHwAOjIh3RMQI4JPAbYMTliRJrWmnj5hTSm9ExDnAL8jKpb6XUnp40CKTJKkFDaiOOaW0CFhUwUeuHcj3NQC3r3E187aB29fomn371EdNm1hIkqQdq0s/ZkmSVFxNEnNEnBgR/xURj0bEIN4nMx8i4vGI+G1ELGuG2ZMR8b2IeD4iHuqzbK+IWBwRqwrPe9YzxoEosX2XRsQzhX24LCIa9j6qETExIu6JiJUR8XBEnFdY3vD7cAfb1hT7LyJGRcR/RsTywvbNLSx/R0T8urDv/qUw4VZNquqnsgu37nwEOIGsxOoB4LSU0oqqfnENRcTjQEdKqSnqKCPiaGAj8IOU0iGFZV8HXkopXVb442rPlNLf1zPOnVVi+y4FNqaUrqhnbIMhIvYF9k0pLY2INqALOAX4DA2+D3ewbafSBPsvstsVjkkpbYyI4cB9wHnA3wK3pJR+HBH/G1ieUrqmnrGqempxxFzxrTtVXymlJcBbW0F9BLix8PpGsl+GDanE9jWNlNKalNLSwusNwEpgAk2wD3ewbU0hZTYW3g4vPBJwLPCTwvKG3HcqXy0S8wTgqT7vn6aJ/iMVJODOiOgq3OmsGY1PKa2B7JcjsE+d46mGcyLiN4VT3Q13mreYiJgEHAb8mibbh2/ZNmiS/RcRQyNiGfA8sBh4DPhDSumNwpBm/B2qPmqRmIt1kmi2qeB/mlKaStZp6+zCqVI1lmuAA4B2YA1wZX3DGbiI2BVYCJyfUmqqTipFtq1p9l9KqSel1E52N8UjgHcXG1bbqFRLtUjMZd26s5GllJ4tPD8P/JTsP1Ozea5wfa/3Ot/zdY5nUKWUniv8QtwCXEeD78PC9cmFwE0ppVsKi5tiHxbbtmbbfwAppT8AvwLeD+wREb33nWi636HaVi0Sc1PfujMixhQmoRARY4AZNGejjtuAMwqvzwBurWMsg643YRV8lAbeh4UJRNcDK1NKV/X5UcPvw1Lb1iz7LyLGRcQehdejgePJrqPfQ9bRDxp036l8NbnBSKF04ZtsvXXnV6v+pTUSEe8kO0qG7E5q/9zo2xcRPwKmk3XseQ64BPgZcDOwP/Ak8ImUUkNOoCqxfdPJToMm4HHg873XYxtNRBwJ3Av8FthSWDyH7FpsQ+/DHWzbaTTB/ovEB+TIAAAAW0lEQVSI95JN7hpKduB0c0rpfxZ+z/wY2At4EPgfKaXX6xepqsk7f0mSlCPe+UuSpBwxMUuSlCMmZkmScsTELElSjpiYJUnKEROzJEk5YmKWJClHTMySJOXI/wfebDoEcwZOrwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0xd0b0518>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "for i in range(n_clusters):\n",
    "    index = np.nonzero(train_lable==i)[0]\n",
    "    x1 = np.array(train.iloc[index].iloc[:,[0]])\n",
    "    x2 = np.array(train.iloc[index].iloc[:,[1]])\n",
    "    y_i = train_lable[index]\n",
    "    for j in range(len(x1)):\n",
    "        if j < 50:  #每类打印50个\n",
    "            plt.text(x1[j],x2[j],str(int(y_i[j])),color=colors[i],fontdict={'weight': 'bold', 'size': 9}) \n",
    "\n",
    "plt.axis([0,30,0,30])\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
